Re: x86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-10-21 Thread Yinghai Lu
On Thu, Oct 19, 2017 at 10:52 PM, Dave Young  wrote:
> Now crashkernel=X will fail if there's not enough memory at low region
> (below 896M) when trying to reserve large memory size.  One can use
> crashkernel=xM,high to reserve it at high region (>4G) but it is more
> convinient to improve crashkernel=X to:
>
>  - First try to reserve X below 896M (for being compatible with old
>kexec-tools).
>  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
>  - If fails, try to reserve X from MAXMEM top down.
>
> It's more transparent and user-friendly.

ok with me.

But looks like last time Vivek did not like this idea.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH] memmap: Parse "Reserved" together with "reserved"

2017-04-25 Thread Yinghai Lu
For x86 with recent kernel after
 commit 640e1b38b0 ("x86/boot/e820: Basic cleanup of e820.c")
change "reserved" to "Reserved" in /sys firmware memmap and /proc/iomem.

So here, we add handling for that too.

Signed-off-by: Yinghai Lu <ying...@kernel.org>

---
 kexec/arch/i386/crashdump-x86.c |2 ++
 kexec/arch/ia64/kexec-ia64.c|2 ++
 kexec/arch/mips/kexec-mips.c|2 ++
 kexec/firmware_memmap.c |2 ++
 4 files changed, 8 insertions(+)

Index: kexec-tools/kexec/arch/i386/crashdump-x86.c
===
--- kexec-tools.orig/kexec/arch/i386/crashdump-x86.c
+++ kexec-tools/kexec/arch/i386/crashdump-x86.c
@@ -323,6 +323,8 @@ static int get_crash_memory_ranges(struc
type = RANGE_PMEM;
} else if(memcmp(str,"reserved\n",9) == 0 ) {
type = RANGE_RESERVED;
+   } else if(memcmp(str,"Reserved\n",9) == 0 ) {
+   type = RANGE_RESERVED;
} else if (memcmp(str, "GART\n", 5) == 0) {
gart_start = start;
gart_end = end;
Index: kexec-tools/kexec/arch/ia64/kexec-ia64.c
===
--- kexec-tools.orig/kexec/arch/ia64/kexec-ia64.c
+++ kexec-tools/kexec/arch/ia64/kexec-ia64.c
@@ -117,6 +117,8 @@ int get_memory_ranges(struct memory_rang
}
else if (memcmp(str, "reserved\n", 9) == 0) {
type = RANGE_RESERVED;
+   else if (memcmp(str, "Reserved\n", 9) == 0) {
+   type = RANGE_RESERVED;
}
else if (memcmp(str, "Crash kernel\n", 13) == 0) {
/* Redefine the memory region boundaries if kernel
Index: kexec-tools/kexec/arch/mips/kexec-mips.c
===
--- kexec-tools.orig/kexec/arch/mips/kexec-mips.c
+++ kexec-tools/kexec/arch/mips/kexec-mips.c
@@ -57,6 +57,8 @@ int get_memory_ranges(struct memory_rang
type = RANGE_RAM;
} else if (memcmp(str, "reserved\n", 9) == 0) {
type = RANGE_RESERVED;
+   } else if (memcmp(str, "Reserved\n", 9) == 0) {
+   type = RANGE_RESERVED;
} else {
continue;
}
Index: kexec-tools/kexec/firmware_memmap.c
===
--- kexec-tools.orig/kexec/firmware_memmap.c
+++ kexec-tools/kexec/firmware_memmap.c
@@ -164,6 +164,8 @@ static int parse_memmap_entry(const char
range->type = RANGE_RESERVED;
else if (strcmp(type, "reserved") == 0)
range->type = RANGE_RESERVED;
+   else if (strcmp(type, "Reserved") == 0)
+   range->type = RANGE_RESERVED;
else if (strcmp(type, "ACPI Non-volatile Storage") == 0)
range->type = RANGE_ACPI_NVS;
else if (strcmp(type, "Uncached RAM") == 0)

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [tip:x86/boot] x86/boot/e820: Basic cleanup of e820.c

2017-04-25 Thread Yinghai Lu
On Tue, Apr 11, 2017 at 12:37 AM, tip-bot for Ingo Molnar
 wrote:
> Commit-ID:  640e1b38b00550990cecd809021cd37716e45922
> Gitweb: http://git.kernel.org/tip/640e1b38b00550990cecd809021cd37716e45922
> Author: Ingo Molnar 
> AuthorDate: Sat, 28 Jan 2017 11:13:08 +0100
> Committer:  Ingo Molnar 
> CommitDate: Sat, 28 Jan 2017 14:42:27 +0100
>

> x86/boot/e820: Basic cleanup of e820.c

> @@ -951,49 +924,42 @@ void __init finish_e820_parsing(void)
>  static const char *__init e820_type_to_string(int e820_type)
>  {
> switch (e820_type) {
> -   case E820_RESERVED_KERN:
> -   case E820_RAM:  return "System RAM";
> -   case E820_ACPI: return "ACPI Tables";
> -   case E820_NVS:  return "ACPI Non-volatile Storage";
> -   case E820_UNUSABLE: return "Unusable memory";
> -   case E820_PRAM: return "Persistent Memory (legacy)";
> -   case E820_PMEM: return "Persistent Memory";
> -   default:return "reserved";
> +   case E820_RESERVED_KERN: /* Fall-through: */
> +   case E820_RAM:   return "System RAM";
> +   case E820_ACPI:  return "ACPI Tables";
> +   case E820_NVS:   return "ACPI Non-volatile Storage";
> +   case E820_UNUSABLE:  return "Unusable memory";
> +   case E820_PRAM:  return "Persistent Memory (legacy)";
> +   case E820_PMEM:  return "Persistent Memory";
> +   default: return "Reserved";
> }
>  }
>
...

Hi Ingo,

The reserved ==> Reserved change cause kexec warning.

Unknown type (Reserved) while parsing /sys/firmware/memmap/18/type.
Please report this as bug. Using RANGE_RESERVED now.
Unknown type (Reserved) while parsing /sys/firmware/memmap/16/type.
Please report this as bug. Using RANGE_RESERVED now.
Unknown type (Reserved) while parsing /sys/firmware/memmap/14/type.
Please report this as bug. Using RANGE_RESERVED now.
Unknown type (Reserved) while parsing /sys/firmware/memmap/22/type.
Please report this as bug. Using RANGE_RESERVED now.
Unknown type (Reserved) while parsing /sys/firmware/memmap/9/type.
Please report this as bug. Using RANGE_RESERVED now.
add_buffer: base:43fff6000 bufsz:80e0 memsz:a000
add_buffer: base:43fff1000 bufsz:44ce memsz:44ce
add_buffer: base:43c00 bufsz:f4c5c0 memsz:3581000
add_buffer: base:439d0d000 bufsz:22f2060 memsz:22f2060
add_buffer: base:43fff bufsz:70 memsz:70
add_buffer: base:43ffef000 bufsz:230 memsz:230
10:~/k # cat /sys/firmware/memmap/14/type
Reserved

also /proc/iomem have that changed too.

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] x86/mm/ident_map: Add PUD level 1GB page support

2017-04-25 Thread Yinghai Lu
On Tue, Apr 25, 2017 at 2:13 AM, Xunlei Pang  wrote:
> The current kernel_ident_mapping_init() creates the identity
> mapping using 2MB page(PMD level), this patch adds the 1GB
> page(PUD level) support.
>
> This is useful on large machines to save some reserved memory
> (as paging structures) in the kdump case when kexec setups up
> identity mappings before booting into the new kernel.
>
> We will utilize this new support in the following patch.
>
> Signed-off-by: Xunlei Pang 
> ---
>  arch/x86/boot/compressed/pagetable.c |  2 +-
>  arch/x86/include/asm/init.h  |  3 ++-
>  arch/x86/kernel/machine_kexec_64.c   |  2 +-
>  arch/x86/mm/ident_map.c  | 13 -
>  arch/x86/power/hibernate_64.c|  2 +-
>  5 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/pagetable.c 
> b/arch/x86/boot/compressed/pagetable.c
> index 56589d0..1d78f17 100644
> --- a/arch/x86/boot/compressed/pagetable.c
> +++ b/arch/x86/boot/compressed/pagetable.c
> @@ -70,7 +70,7 @@ static void *alloc_pgt_page(void *context)
>   * Due to relocation, pointers must be assigned at run time not build time.
>   */
>  static struct x86_mapping_info mapping_info = {
> -   .pmd_flag   = __PAGE_KERNEL_LARGE_EXEC,
> +   .page_flag   = __PAGE_KERNEL_LARGE_EXEC,
>  };
>
>  /* Locates and clears a region for a new top level page table. */
> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> index 737da62..46eab1a 100644
> --- a/arch/x86/include/asm/init.h
> +++ b/arch/x86/include/asm/init.h
> @@ -4,8 +4,9 @@
>  struct x86_mapping_info {
> void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> void *context;   /* context for alloc_pgt_page */
> -   unsigned long pmd_flag;  /* page flag for PMD entry */
> +   unsigned long page_flag; /* page flag for PMD or PUD entry */
> unsigned long offset;/* ident mapping offset */
> +   bool use_pud_page;  /* PUD level 1GB page support */

how about use direct_gbpages instead?
use_pud_page is confusing.

>  };
>
>  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> diff --git a/arch/x86/kernel/machine_kexec_64.c 
> b/arch/x86/kernel/machine_kexec_64.c
> index 085c3b3..1d4f2b0 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -113,7 +113,7 @@ static int init_pgtable(struct kimage *image, unsigned 
> long start_pgtable)
> struct x86_mapping_info info = {
> .alloc_pgt_page = alloc_pgt_page,
> .context= image,
> -   .pmd_flag   = __PAGE_KERNEL_LARGE_EXEC,
> +   .page_flag  = __PAGE_KERNEL_LARGE_EXEC,
> };
> unsigned long mstart, mend;
> pgd_t *level4p;
> diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
> index 04210a2..0ad0280 100644
> --- a/arch/x86/mm/ident_map.c
> +++ b/arch/x86/mm/ident_map.c
> @@ -13,7 +13,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, 
> pmd_t *pmd_page,
> if (pmd_present(*pmd))
> continue;
>
> -   set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
> +   set_pmd(pmd, __pmd((addr - info->offset) | info->page_flag));
> }
>  }
>
> @@ -30,6 +30,17 @@ static int ident_pud_init(struct x86_mapping_info *info, 
> pud_t *pud_page,
> if (next > end)
> next = end;
>
> +   if (info->use_pud_page) {
> +   pud_t pudval;
> +
> +   if (pud_present(*pud))
> +   continue;
> +
> +   pudval = __pud((addr - info->offset) | 
> info->page_flag);
> +   set_pud(pud, pudval);

should mask addr with PUD_MASK.
   addr &= PUD_MASK;
   set_pud(pud, __pmd(addr - info->offset) | info->page_flag);



> +   continue;
> +   }
> +
> if (pud_present(*pud)) {
> pmd = pmd_offset(pud, 0);
> ident_pmd_init(info, pmd, addr, next);
> diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
> index 6a61194..a6e21fe 100644
> --- a/arch/x86/power/hibernate_64.c
> +++ b/arch/x86/power/hibernate_64.c
> @@ -104,7 +104,7 @@ static int set_up_temporary_mappings(void)
>  {
> struct x86_mapping_info info = {
> .alloc_pgt_page = alloc_pgt_page,
> -   .pmd_flag   = __PAGE_KERNEL_LARGE_EXEC,
> +   .page_flag  = __PAGE_KERNEL_LARGE_EXEC,
> .offset = __PAGE_OFFSET,
> };
> unsigned long mstart, mend;
> --
> 1.8.3.1
>

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v2 1/2] kexec: Introduce "/sys/kernel/kexec_crash_low_size"

2016-08-23 Thread Yinghai Lu
On Wed, Aug 17, 2016 at 1:20 AM, Dave Young  wrote:
> On 08/17/16 at 09:50am, Xunlei Pang wrote:
>> "/sys/kernel/kexec_crash_size" only handles crashk_res, it
>> is fine in most cases, but sometimes we have crashk_low_res.
>> For example, when "crashkernel=size[KMG],high" combined with
>> "crashkernel=size[KMG],low" is used for 64-bit x86.
>>
>> Like crashk_res, we introduce the corresponding sysfs file
>> "/sys/kernel/kexec_crash_low_size" for crashk_low_res.
>>
>> So, the exact total reserved memory is the sum of the two.
>>
>> crashk_low_res can also be shrunk via this new interface,
>> and users should be aware of what they are doing.
...
>> @@ -218,6 +238,7 @@ static struct attribute * kernel_attrs[] = {
>>  #ifdef CONFIG_KEXEC_CORE
>>   _loaded_attr.attr,
>>   _crash_loaded_attr.attr,
>> + _crash_low_size_attr.attr,
>>   _crash_size_attr.attr,
>>   _attr.attr,
>>  #endif

would be better if you can use attribute_group .is_visible to control showing of
crash_low_size only when the crash_base is above 4G.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: commit 5edcbfd1368e break kexec on x86-64

2015-05-04 Thread Yinghai Lu
On Mon, May 4, 2015 at 8:56 AM, Aníbal Limón
anibal.li...@linux.intel.com wrote:
 Hi Yinghai,

 I'm trying to reproduce the issue that you found but i can't, any option
 that you use?.

Can you compile current upstream kernel?

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


commit 5edcbfd1368e break kexec on x86-64

2015-04-29 Thread Yinghai Lu
with

commit 5edcbfd1368e84fce913ceeeca7b712c524dc20d
Author: Aníbal Limón anibal.li...@linux.intel.com
Date:   Thu Mar 26 16:19:58 2015 +

x86_64: Add support to build kexec-tools with x32 ABI


after built kexec on opensuse 13.1 64bit, now when using kexec load
kernel, I got

overflow in relocation type R_X86_64_32 val 21dffc020

revert that commit and the one after it, kexec is working again.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-24 Thread Yinghai Lu
On Mon, Oct 21, 2013 at 8:16 AM, Vivek Goyal vgo...@redhat.com wrote:
 On Fri, Oct 18, 2013 at 10:45:43PM -0700, Yinghai Lu wrote:


 IIUC, you are trying to say that with new kernel old kexec-tools will fail
 at a different failure point. I don't see why that is a problem. It still
 fails.

Yes, that could cause confusion.

We already knew it would fail possible at most later, we should make
it skip allocation during first kernel booting.


 [..]
  You are not thinking about ease of use here for existing users.

 most existing user don't need to do anything. just with new kernel and
 old kexec tools.

 those system that did not work kexec before because XM is too big, they have 
 to
 update kexec tools, and use ,high

 Make it simple, less error.

 No, it is not that simple. Think from a distribution's perspective also.
 We have the logic to scale reserved memory based on physical memory
 present in the system. Now we are seeing bigger memory systems (which
 would not have worked in the past). We still want to retain the existing
 logic and not switch to crashkernel=x,high. One does not have to. It
 makes life simpler.

distribution should go with ,high for 64 bit kernel and new kexec-tools.
for 32bit kernel, you still can have ,high or not, as ,high is ignored.



 Same logic working both with smaller memory systems as well as large memory
 systems. One should not have to choose a different command line because
 there is more physical RAM present in the system.

,high is working even on smaller memory sytem.



 We already support above 4G, what is point for trying below 4G?

 Because it is not *required* to reserve memory above 4G. Because we want
 same command line to work with both small memory systems as well as
 large memory systems and we don't care whether memory is reserved below
 4G or above 4G. What does matter though that we don't have to worry about
 switching command line option if it is large memory system.

,high will work smaller or large memory system after you install new
kexec tools.

Again, for distribution, when new kernel is added, new kernel will all
have ,high
and new kexec-tools get installed.

Even we want to extend crashkernel=XM, then i would like to have
it identical to crashkernel=XM,high instead.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-24 Thread Yinghai Lu
On Mon, Oct 14, 2013 at 4:46 AM, WANG Chao chaow...@redhat.com wrote:
 Now crashkernel=X will fail out if there's not enough memory at
 low (below 896M). What makes sense for crashkernel=X would be:

  - First try to reserve X below 896M (for being compatible with old
kexec-tools).
  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
  - If fails, try to reserve X from MAXMEM top down.

 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
 index f0de629..38e6c1f 100644
 --- a/arch/x86/kernel/setup.c
 +++ b/arch/x86/kernel/setup.c
 @@ -593,6 +593,20 @@ static void __init reserve_crashkernel(void)
 high ? CRASH_KERNEL_ADDR_HIGH_MAX :
CRASH_KERNEL_ADDR_LOW_MAX,
 crash_size, alignment);
 +   /*
 +* crashkernel=X reserve below 896M fails? Try below 4G
 +*/
 +   if (!high  !crash_base)
 +   crash_base = memblock_find_in_range(alignment,
 +   (1ULL  32),
 +   crash_size, alignment);

Another problem, it would allocate range in [0,4g) for 32bit,
if the user have crashkernel=512M or plus.

 +   /*
 +* crashkernel=X reserve below 4G fails? Try MAXMEM
 +*/
 +   if (!high  !crash_base)
 +   crash_base = memblock_find_in_range(alignment,
 +   CRASH_KERNEL_ADDR_HIGH_MAX,
 +   crash_size, alignment);

 if (!crash_base) {
 pr_info(crashkernel reservation failed - No suitable 
 area found.\n);


Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-24 Thread Yinghai Lu
On Thu, Oct 24, 2013 at 4:01 AM, WANG Chao chaow...@redhat.com wrote:


 I think crashkernel=XM,high is really supposed to be used when user indeed
 want to reserve from high.

No. Keep the all 64bit to stay high, make thing simple.
instead of some low and some use high.


 Like Vivek said, failing at different point shouldn't be a problem.
 That's an incorrect configuration. When crashkernel=1G,high, old
 kexec-tools still fails the same way. That could cause confusion, in
 your word.

If it would fail later, we should let it fail early as possible.


 Let me put it in an example, a user want to utilize this new kernel
 feature to reserve 1G for crash kernel but not upgrade kexec-tools,

 - W/o this patch:
  First he would try crashkernel=1G, but failed to reserve. Second time,
  he goes with crashkernel=1G,high, reservation is fine but kexec fails
  to load. Upgrading kexec-tools is essential to him.

 - W/ this patch:
   First he would try crashkernel=1G, reservation is ok but kexec fails
   to load the same way as the case of crashkernel=1G,high. Upgrading
   kexec-tools is essential to him.

 The point is old kexec-tools can't load high, no matter by what kind of
 crashkernel cmdline to reserve at high.

old kexec-tools could work cross 892M in some case.
That will confuse the user, as it works some time on some setup, but does
not work on other setup.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-24 Thread Yinghai Lu
On Thu, Oct 24, 2013 at 7:02 AM, Vivek Goyal vgo...@redhat.com wrote:
 On Wed, Oct 23, 2013 at 11:11:51PM -0700, Yinghai Lu wrote:

 Hence both crashkernel=xM and crashkernel=XM,high have their own usage.
 We have been using crashkernel=xM and we know it works. So extending it
 to be able to allocate memory from higher regions, if sufficient memory
 is not available in lower regions makes sense. Memory reservation below
 4G is more efficient due to not requiring swiotlb. And crashkernel=xM
 has been working for us and users are familiar with it.

 So I don't see a point that why would you try to block any move to
 extend crashkernel=xM semantics.

Make the thing simple.
Keep them separately, leave crashkernel=xM to old kexec-tools mostly
and keep crashkernel=xM,high to newer kexec-tools as needed.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-24 Thread Yinghai Lu
On Thu, Oct 24, 2013 at 12:18 PM, Vivek Goyal vgo...@redhat.com wrote:
 On Thu, Oct 24, 2013 at 12:15:25PM -0700, Yinghai Lu wrote:

 Also keeping things simple by not trying to *impose* a new crashkernel=
 syntax on existing crashkernel=xM users.

Existing user that have crashkernel=xM working with their old kernel
and old kexec-tools, they still could keep their old command line and
old kexec-tools
with new updated kernel.
We should not change semantics to surprise them.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-24 Thread Yinghai Lu
On Thu, Oct 24, 2013 at 12:27 PM, Vivek Goyal vgo...@redhat.com wrote:
 On Thu, Oct 24, 2013 at 12:24:57PM -0700, Yinghai Lu wrote:
 On Thu, Oct 24, 2013 at 12:18 PM, Vivek Goyal vgo...@redhat.com wrote:
  On Thu, Oct 24, 2013 at 12:15:25PM -0700, Yinghai Lu wrote:
 
  Also keeping things simple by not trying to *impose* a new crashkernel=
  syntax on existing crashkernel=xM users.

 Existing user that have crashkernel=xM working with their old kernel
 and old kexec-tools, they still could keep their old command line and
 old kexec-tools
 with new updated kernel.
 We should not change semantics to surprise them.

 Old users will get reservation still below 896MB.

 It will go above 896MB only if memory could not be allocated below 896MB.

 In the past reservation will fail and kexec-tools will fail.
 Now reservation will succeed but kexec-tools will fail.

 So end result a user sees is that kexec-tools fails. So I don't see how
 we are breaking existing installations or user setups.

case could be: if user add more memory and put more pcie cards, and
second kernel will need more ram and OOM there.
so user could just increase crashkernel=512M to crashkernel=1G.

without Cong's patch, kernel will fail to reserve, and user would dig
to change it
to crashkernel=1G,high, and update kexec-tools.

with Cong's patch, kernel will reserve other range like between 896
and 4G, old kexec-tools either
fail to load second kernel or hang in purgatory or early stage of
second kernel, or other unknown behavior.

I would think first path is much clear and predicted.

If my memory is right, HPA did not like idea that we try below 896M,
and then under 4G and then above 4G.
and want us to have ,high solution. Not sure if he would change his mind.
Also you will need and #ifdef CONFIG_X86_64 for 896M/4G searching code
so it will not confuse the
32bit arch.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-18 Thread Yinghai Lu
On Fri, Oct 18, 2013 at 5:38 AM, Vivek Goyal vgo...@redhat.com wrote:
 On Thu, Oct 17, 2013 at 08:50:07PM -0700, Yinghai Lu wrote:

 [..]
  Previously high reservation (reservation above 896M) will anyway fail. So
  instead of failing, if we try reservation in higher memory areas why that
  would break old kexec-tools?

 If thel old kexec-tools would fail, we should let them fail early as 
 possible,
 do not reserve at first point.

 A user does not care if they get the message memory nor reserved or if
 they get the message that could not find a suitable memory hole at
 address X.

you are asking for trouble.

Now we have two paths:
1. old kernel with old kexec tools. crashkernel=XM, work,
the new kernel with old kexec tools still working with crashkernel=XM
2. old kernel with old kernel tools, crashkernel=XM, not working.
as X is too big.
then user update to new kernel AND new kexec-tools, and crashkernel=XM,high

with this patch, you will need to test new kernel all old kexec tools
to make sure
it will fail later instead of fail early to remind them to update kexec tools.
Also would make user to guess and try to make new kernel to work with old
kexec-tools



 
  IOW, previously anyway kexec-tools will not work as no memory will be
  reserved in higher memory area. Now memory will be reserved but old
  kexec-tools should fail as it can't load in that area.
 
  If that works, then one would use crashkernel=X,high only if he is
  particualr that memory reservation comes from area above 4G (despite
  the fact that same memory could have been reserved below 4G too).

 My point:
 Push user to use ,high as more as possible, so we only to handle one
 path eventually.
 for old kernel, leave them to use old grammer. do not need to change it.

 I don't understand this. Why we should push users to use ,high syntax.
 That is an option. Those who want to use it, should use it.

 We have been using crashkernel=XM for a long time now. And it makes sense
 to extend this option to be able to reserve memory at higher addresses
 if asked memory is not available at lower addresses.

 You are not thinking about ease of use here for existing users.

most existing user don't need to do anything. just with new kernel and
old kexec tools.

those system that did not work kexec before because XM is too big, they have to
update kexec tools, and use ,high

Make it simple, less error.



 Also boot loader should always have different entry for old kernel and
 new kernel.

 What does memory reservation location has to do with kernel entry point?
 If it is a 64bit bzImage, we will use 64bit entry point by default, isn't
 it? Does not matter whether memory is reserved above 4G or not.

 I think it makes sense that existing crashkernel=XM usres be able to
 reserve memory in higher memory area if sufficient memory is not available
 below 896M or below 4G. Those who always want memory reservation above 4G
 they should use ,high syntax and enforce memory allocation above 4G.

We already support above 4G, what is point for trying below 4G?

You could get more bug report about new kernel with old kexec-tools, as
old kexec-tools could work with range between 896M and 4G in some case,
but not all.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-17 Thread Yinghai Lu
On Tue, Oct 15, 2013 at 7:48 AM, Vivek Goyal vgo...@redhat.com wrote:
 On Mon, Oct 14, 2013 at 11:54:22AM -0700, Yinghai Lu wrote:
 On Mon, Oct 14, 2013 at 4:46 AM, WANG Chao chaow...@redhat.com wrote:
 User should change crashkernel=X to crashkernel=X,high.

 I think if we can extend old syntax of crashkernel=X, then it makes life
 really easy for users.


 As user could forget to update kexec-tools to utilize ,high feature, and 
 get
 kdump later fail later.

 Previously high reservation (reservation above 896M) will anyway fail. So
 instead of failing, if we try reservation in higher memory areas why that
 would break old kexec-tools?

If thel old kexec-tools would fail, we should let them fail early as possible,
do not reserve at first point.


 IOW, previously anyway kexec-tools will not work as no memory will be
 reserved in higher memory area. Now memory will be reserved but old
 kexec-tools should fail as it can't load in that area.

 If that works, then one would use crashkernel=X,high only if he is
 particualr that memory reservation comes from area above 4G (despite
 the fact that same memory could have been reserved below 4G too).

My point:
Push user to use ,high as more as possible, so we only to handle one
path eventually.
for old kernel, leave them to use old grammer. do not need to change it.

Also boot loader should always have different entry for old kernel and
new kernel.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86, kdump: crashkernel=X try to reserve below 896M first, then try below 4G, then MAXMEM

2013-10-14 Thread Yinghai Lu
On Mon, Oct 14, 2013 at 4:46 AM, WANG Chao chaow...@redhat.com wrote:
 Now crashkernel=X will fail out if there's not enough memory at
 low (below 896M). What makes sense for crashkernel=X would be:

  - First try to reserve X below 896M (for being compatible with old
kexec-tools).
  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
  - If fails, try to reserve X from MAXMEM top down.

 So that user can easily reserve large memory with crashkernel=X instead
 of crashkernel=X,high. It's more transparent and user-friendly.

 If crashkernel is large and the reserved is beyond 896M, old kexec-tools
 won't be compatible with new kernel for most of time.

 kexec will fail out immediately in this case. But the failure could be
 expected, because old kexec users should not try to reserve that large
 amount of memory at the first place.

 On the other hand, old kexec also will fail on old kernel when there's
 not enough low memory to reserve a large crash kernel area. So the
 failure of old kexec is consistent between old kernel and new kernel.

 Signed-off-by: WANG Chao chaow...@redhat.com
 ---
  arch/x86/kernel/setup.c | 14 ++
  1 file changed, 14 insertions(+)

 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
 index f0de629..38e6c1f 100644
 --- a/arch/x86/kernel/setup.c
 +++ b/arch/x86/kernel/setup.c
 @@ -593,6 +593,20 @@ static void __init reserve_crashkernel(void)
 high ? CRASH_KERNEL_ADDR_HIGH_MAX :
CRASH_KERNEL_ADDR_LOW_MAX,
 crash_size, alignment);
 +   /*
 +* crashkernel=X reserve below 896M fails? Try below 4G
 +*/
 +   if (!high  !crash_base)
 +   crash_base = memblock_find_in_range(alignment,
 +   (1ULL  32),
 +   crash_size, alignment);
 +   /*
 +* crashkernel=X reserve below 4G fails? Try MAXMEM
 +*/
 +   if (!high  !crash_base)
 +   crash_base = memblock_find_in_range(alignment,
 +   CRASH_KERNEL_ADDR_HIGH_MAX,
 +   crash_size, alignment);

 if (!crash_base) {
 pr_info(crashkernel reservation failed - No suitable 
 area found.\n);
 --

User should change crashkernel=X to crashkernel=X,high.

As user could forget to update kexec-tools to utilize ,high feature, and get
kdump later fail later.

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: reserved crash memory above #define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF

2013-04-17 Thread Yinghai Lu
On Wed, Apr 17, 2013 at 6:52 AM, Thomas Renninger tr...@suse.de wrote:
 Hi,

 while trying to switch to kvm setup with a latest kernel I realized that
 blank:
 crashkernel=64M
 param will reserve this memory area:
 7b00-7eff : Crash kernel

 and kexec does not successfully load the kernel due to (hole_max output
 added myself):
 Could not find a free area of memory of 4c1000 bytes, hole_max: 0x37ff...
 locate_hole failed

https://patchwork.kernel.org/patch/2447581/
https://patchwork.kernel.org/patch/2447561/
https://patchwork.kernel.org/patch/2447571/
https://patchwork.kernel.org/patch/2447591/

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 5/5] kexec: X86: Pass memory ranges via e820 table instead of memmap= boot parameter

2013-04-11 Thread Yinghai Lu
On Thu, Apr 11, 2013 at 5:26 AM, Thomas Renninger tr...@suse.de wrote:
 Currently ranges are passed via kernel boot parameters:
 memmap=exactmap memmap=X#Y memmap=

 Pass them via e820 table directly instead.

how to address saved_max_pfn referring in kernel?

kernel need to use saved_max_pfn from old e820 in
drivers/char/mem.c::read_oldmem()

mips and powerpc they are passing that from command line savemaxmem=

x86 should use that too?

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kdump, x86: Process multiple Crash kernel in /proc/iomem

2013-03-22 Thread Yinghai Lu
On Fri, Mar 22, 2013 at 2:21 PM, Vivek Goyal vgo...@redhat.com wrote:

  In case of kexec_on_panic, we seem to have all the memory ranges in
  info-memory_ranges[]. I guess we don't need that. We just need ranges
  which are reserved for crash kernel and marked by Crash Kernel. In
  that case we will be able to handle multiple Crash Kernel ranges.

 but we still like to put kernel and initrd high, and leave low range
 for swiotlb.
 could just find mem_min and mem_max for largest and last one.

 Yes, but I guess that should not be hard coded here. It is during load
 phase we should enforce where we want to load all the segments.

 It will work though for our current usage. May be later we can improve
 it further. Where loader sees all the memory ranges (low and high) and
 we choose appropriate segment.

ok.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kdump, x86: Process multiple Crash kernel in /proc/iomem

2013-03-22 Thread Yinghai Lu
On Fri, Mar 22, 2013 at 2:27 PM, H. Peter Anvin h...@zytor.com wrote:
 On 03/22/2013 02:21 PM, Vivek Goyal wrote:

 BTW, I had a query about loading 64bit entry point bzImage. In 32bit
 bzImage entry point logic, we used to load bzImage at the beginning
 of memory hole and initrd at the end of memory hole. So that bzImage
 and initramfs are as far as possible and initramfs decompression does
 not overwrite anything or for that matter setting bss are does not
 spill over into initramfs.

 In new code, It looks like we seem to be loading kernel towards the end
 of the hole.

addr = add_buffer(info, kernel + kern16_size, k_size,
   size, align, 0x10, -1, -1);

 IIUC, this has potential that new kernel can overwrite some of the old
 kernel's data structure while setting up bss. Shouldn't we do it 32bit
 entry code way where bzImage is loaded towards the beginning of hole and
 initramfs is loaded towards the end of the hole.


 Since boot protocol 2.10+ the kernel actively exports how much memory it
 needs during its setup phase.


yes, we got that size that kernel needed from hdr. more lines from the new code.

/* need to use run-time size for buffer searching */
dbgprintf(kernel init_size 0x%x\n, real_mode-init_size);
size = _ALIGN(real_mode-init_size, 4096);
align = real_mode-kernel_alignment;

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer

2013-03-09 Thread Yinghai Lu
[ Add more to To list ]

On Fri, Mar 8, 2013 at 10:24 AM, Yinghai Lu ying...@kernel.org wrote:
 On Fri, Mar 8, 2013 at 4:12 AM, WANG Chao chaow...@redhat.com wrote:

 what is 00:02.0 in your system?
 This IOMMU issue is related to https://lkml.org/lkml/2012/11/26/814. We can
 discuss this IOMMU issue in that thread.
 Anyway 00:02.0 is a video card, the box is Ivy Bridge.
 # lspci -s 00:02.0 -v
 00:02.0 VGA compatible controller: Intel Corporation 3rd Gen Core processor
 Graphics Controller (rev 09) (prog-if 00 [VGA controller])
 Subsystem: Intel Corporation Device 2211
 Flags: bus master, fast devsel, latency 0, IRQ 44
 Memory at afc0 (64-bit, non-prefetchable) [size=4M]
 Memory at c000 (64-bit, prefetchable) [size=256M]
 I/O ports at 6000 [size=64]
 Expansion ROM at unassigned [disabled]
 Capabilities: [90] MSI: Enable+ Count=1/1 Maskable- 64bit-
 Capabilities: [d0] Power Management version 2
 Capabilities: [a4] PCI Advanced Features
 Kernel driver in use: i915

 disable drm for i915 will make your iommu work with dump?



 Is it expected to intel_iommu=on or crashkernel_low to make 2nd kernel boot 
 in
 3.9? Back in 3.8, it works just fine w/ only crashkernel param.

 Yes, I really do not want to set crashkernel low range like 72M
 automatically for all.
 that would have the system with proper iommu support lose 72M under 4G
 in first kernel.
 And can not play allocate and return tricks, as first kernel have no
 idea if iommu will work
 on second kernel even iommu is working on first kernel.

 Better to fix iommu support at first.

 For old system that does not have DMAR or kernel does not have IOMMU
 support enabled, or
 user does not pass intel_iommu=on.
 We could set crashkernel low range to 72M automatically.

It seem that it is not worthy to check case that does not support
IOMMU in second kernel.

Please check attached patch that will just set crashkernel_low auto, and if the
system DO support iommu with kdump, user can specify crashkernel_low=0
to save low 72M.

Thanks

Yinghai


fix_crashkernel_low.patch
Description: Binary data
___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer

2013-03-08 Thread Yinghai Lu
On Fri, Mar 8, 2013 at 4:12 AM, WANG Chao chaow...@redhat.com wrote:

 what is 00:02.0 in your system?
 This IOMMU issue is related to https://lkml.org/lkml/2012/11/26/814. We can
 discuss this IOMMU issue in that thread.
 Anyway 00:02.0 is a video card, the box is Ivy Bridge.
 # lspci -s 00:02.0 -v
 00:02.0 VGA compatible controller: Intel Corporation 3rd Gen Core processor
 Graphics Controller (rev 09) (prog-if 00 [VGA controller])
 Subsystem: Intel Corporation Device 2211
 Flags: bus master, fast devsel, latency 0, IRQ 44
 Memory at afc0 (64-bit, non-prefetchable) [size=4M]
 Memory at c000 (64-bit, prefetchable) [size=256M]
 I/O ports at 6000 [size=64]
 Expansion ROM at unassigned [disabled]
 Capabilities: [90] MSI: Enable+ Count=1/1 Maskable- 64bit-
 Capabilities: [d0] Power Management version 2
 Capabilities: [a4] PCI Advanced Features
 Kernel driver in use: i915

disable drm for i915 will make your iommu work with dump?



 Is it expected to intel_iommu=on or crashkernel_low to make 2nd kernel boot in
 3.9? Back in 3.8, it works just fine w/ only crashkernel param.

Yes, I really do not want to set crashkernel low range like 72M
automatically for all.
that would have the system with proper iommu support lose 72M under 4G
in first kernel.
And can not play allocate and return tricks, as first kernel have no
idea if iommu will work
on second kernel even iommu is working on first kernel.

Better to fix iommu support at first.

For old system that does not have DMAR or kernel does not have IOMMU
support enabled, or
user does not pass intel_iommu=on.
We could set crashkernel low range to 72M automatically.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [RFC PATCH 0/5] crash dump bitmap: scan memory pages in kernel to speedup kernel dump process

2013-03-07 Thread Yinghai Lu
On Thu, Mar 7, 2013 at 7:21 AM, Vivek Goyal vgo...@redhat.com wrote:
 Looks like now hpa and yinghai have done the work to be able to load
 kdump kernel above 4GB. I am assuming this also removes the restriction
 that we can only reserve 512MB or 896MB in second kernel.

Yes, From v3.9 and kexec-tools 2.0.4 on x86 64bit.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer

2013-03-07 Thread Yinghai Lu
On Thu, Mar 7, 2013 at 10:03 PM, CAI Qian caiq...@redhat.com wrote:
 CC'ing kexec ML. Also mentioned that 3.8 has no such issue.

 This message looks suspicious and out of range while 3.8 reservation
 looks within the range.

 [0.00] Reserving 128MB of memory at 5216MB for crashkernel
 (System RAM: 3977MB)

 Wondering if anything to do with memblock again...

that is intended...

 - Original Message -
 From: WANG Chao chaow...@redhat.com
 To: LKML vger.kernel.org
 Cc: CAI Qian caiq...@redhat.com
 Sent: Friday, March 8, 2013 1:54:37 PM
 Subject: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB 
 buffer earlier and can't now provide you
 with the DMA bounce buffer

 Hi, All

 On 3.9-rc1, I load crash kernel with latest kexec-tools(up to
 28d413a), but
 2nd kernel panic at early time:
 [2.948076] Kernel panic - not syncing: Can not allocate SWIOTLB
 buffer earlier and can't now provide you with the DMA bounce buffer
 [2.959958] Pid: 53, comm: khubd Not tainted 3.9.0-rc1+ #1

You need to add crashkernel_low=64M in first kernel.

As your system does not support DMA remapping.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer

2013-03-07 Thread Yinghai Lu
On Thu, Mar 7, 2013 at 10:32 PM, Yinghai Lu ying...@kernel.org wrote:
 On Thu, Mar 7, 2013 at 10:03 PM, CAI Qian caiq...@redhat.com wrote:
 CC'ing kexec ML. Also mentioned that 3.8 has no such issue.

 This message looks suspicious and out of range while 3.8 reservation
 looks within the range.

 [0.00] Reserving 128MB of memory at 5216MB for crashkernel
 (System RAM: 3977MB)

 Wondering if anything to do with memblock again...

 that is intended...

 - Original Message -
 From: WANG Chao chaow...@redhat.com
 To: LKML vger.kernel.org
 Cc: CAI Qian caiq...@redhat.com
 Sent: Friday, March 8, 2013 1:54:37 PM
 Subject: 3.9-rc1: crash kernel panic - not syncing: Can not allocate 
 SWIOTLB buffer earlier and can't now provide you
 with the DMA bounce buffer

 Hi, All

 On 3.9-rc1, I load crash kernel with latest kexec-tools(up to
 28d413a), but
 2nd kernel panic at early time:
 [2.948076] Kernel panic - not syncing: Can not allocate SWIOTLB
 buffer earlier and can't now provide you with the DMA bounce buffer
 [2.959958] Pid: 53, comm: khubd Not tainted 3.9.0-rc1+ #1

 You need to add crashkernel_low=64M in first kernel.

 As your system does not support DMA remapping.

looks like your system DO have DMAR table, please enable dmar
remapping in your kernel config.

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer

2013-03-07 Thread Yinghai Lu
On Thu, Mar 7, 2013 at 11:20 PM, WANG Chao chaow...@redhat.com wrote:

 looks like your system DO have DMAR table, please enable dmar
 remapping in your kernel config.

 I've already got following config:
 CONFIG_DMAR_TABLE=y
 CONFIG_INTEL_IOMMU=y
 CONFIG_IRQ_REMAP=y

 but I don't have intel_iommu=on in kernel cmdline. IIRC, iommu will prevent
 2nd kernel from booting ...

Did you put intel_iommu=on on first and second cpu both?

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: 3.9-rc1: crash kernel panic - not syncing: Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer

2013-03-07 Thread Yinghai Lu
On Thu, Mar 7, 2013 at 11:33 PM, WANG Chao chaow...@redhat.com wrote:
 On 03/08/2013 03:27 PM, Yinghai Lu wrote:
 On Thu, Mar 7, 2013 at 11:20 PM, WANG Chao chaow...@redhat.com wrote:

 looks like your system DO have DMAR table, please enable dmar
 remapping in your kernel config.

 I've already got following config:
 CONFIG_DMAR_TABLE=y
 CONFIG_INTEL_IOMMU=y
 CONFIG_IRQ_REMAP=y

 but I don't have intel_iommu=on in kernel cmdline. IIRC, iommu will prevent
 2nd kernel from booting ...

 Did you put intel_iommu=on on first and second cpu both?

 I tried, 2nd kernel didn't boot and keep splitting errors like these:
 [2.106939] DMAR: No ATSR found
 [2.110121] IOMMU 0 0xfed9: using Queued invalidation
 [2.115522] IOMMU 1 0xfed91000: using Queued invalidation
 [2.120919] IOMMU: Setting RMRR:
 [2.124162] IOMMU: Setting identity map for device :00:02.0 [0xab80
 - 0xaf9f]
 [2.133099] IOMMU: Setting identity map for device :00:1d.0 [0xaac95000
 - 0xaacb2fff]
 [2.141305] IOMMU: Setting identity map for device :00:1a.0 [0xaac95000
 - 0xaacb2fff]
 [2.149503] IOMMU: Setting identity map for device :00:14.0 [0xaac95000
 - 0xaacb2fff]
 [2.157690] IOMMU: Prepare 0-16MiB unity mapping for LPC
 [2.163011] IOMMU: Setting identity map for device :00:1f.0 [0x0 - 
 0xff
 [Errors, here we go]
 [2.170932] dmar: DRHD: handling fault status reg 3
 [2.170933] PCI-DMA: Intel(R) Virtualization Technology for Directed I/O
 [2.182486] dmar: DMAR:[DMA Write] Request device [00:02.0] fault addr 
 e000
 [2.182486] DMAR:[fault reason 05] PTE Write access is not set
 [2.195705] dmar: DRHD: handling fault status reg 3
 [2.200570] dmar: DMAR:[DMA Read] Request device [00:02.0] fault addr 
 ff873000
 [2.200570] DMAR:[fault reason 06] PTE Read access is not set
 [2.213618] dmar: DRHD: handling fault status reg 3

my Nehalem-EX and Westmere-EX is working with iommu enabled in second kernel.

what is 00:02.0 in your system?

Is your kernel upsteam kernel or redhat flavor one?

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v9 7/7] kexec, x86: handle Crash low kernel range

2013-02-21 Thread Yinghai Lu
kernel could have that in /proc/iomem, will use it for kdump kernel
for dma32

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/crashdump-x86.c |   21 +
 1 file changed, 21 insertions(+)

diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c
index 245402c..83bff5e 100644
--- a/kexec/arch/i386/crashdump-x86.c
+++ b/kexec/arch/i386/crashdump-x86.c
@@ -188,6 +188,8 @@ static struct memory_range 
crash_memory_range[CRASH_MAX_MEMORY_RANGES];
 
 /* Memory region reserved for storing panic kernel and other data. */
 static struct memory_range crash_reserved_mem;
+/* under 4G parts */
+static struct memory_range crash_reserved_low_mem;
 
 /* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
  * create Elf headers. Keeping it separate from get_memory_ranges() as
@@ -282,6 +284,10 @@ static int get_crash_memory_ranges(struct memory_range 
**range, int *ranges,
if (exclude_region(memory_ranges, crash_reserved_mem.start,
crash_reserved_mem.end)  0)
return -1;
+   if (crash_reserved_low_mem.start 
+   exclude_region(memory_ranges, crash_reserved_low_mem.start,
+   crash_reserved_low_mem.end)  0)
+   return -1;
if (gart) {
/* exclude GART region if the system has one */
if (exclude_region(memory_ranges, gart_start, gart_end)  0)
@@ -984,6 +990,12 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
return ENOCRASHKERNEL;
}
 
+   if (crash_reserved_low_mem.start) {
+   sz = crash_reserved_low_mem.end - crash_reserved_low_mem.start
++1;
+   add_memmap(memmap_p, crash_reserved_low_mem.start, sz);
+   }
+
/* Create a backup region segment to store backup data*/
if (!(info-kexec_flags  KEXEC_PRESERVE_CONTEXT)) {
sz = (info-backup_src_size + align)  ~(align - 1);
@@ -1059,5 +1071,14 @@ int is_crashkernel_mem_reserved(void)
crash_reserved_mem.end = end;
crash_reserved_mem.type = RANGE_RAM;
 
+   /* If there is no Crash low kernel, still can go on */
+   if (parse_iomem_single(Crash kernel low\n, start, end) ||
+   start == end)
+   return 1;
+
+   crash_reserved_low_mem.start = start;
+   crash_reserved_low_mem.end = end;
+   crash_reserved_low_mem.type = RANGE_RAM;
+
return 1;
 }
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v9 4/7] kexec, x86: Fix bzImage real-mode booting

2013-02-21 Thread Yinghai Lu
We need to keep space for bss, heap/stack before command line.
otherwise command_line will be cleared by kernel 16bit init code.

also need to set 32bit start in real_mode header, kernel 16bit code
need to jump there.

Also don't touch regs16 if --real-mode is not specified.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   62 +++
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 35005c7..0756f23 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -117,6 +117,8 @@ int do_bzImage_load(struct kexec_info *info,
unsigned long kernel32_load_addr;
char *modified_cmdline;
unsigned long cmdline_end;
+   unsigned long kern16_size_needed;
+   unsigned long heap_size = 0;
 
/*
 * Find out about the file I am about to load.
@@ -208,8 +210,29 @@ int do_bzImage_load(struct kexec_info *info,
elf_rel_build_load(info, info-rhdr, purgatory, purgatory_size,
0x3000, 640*1024, -1, 0);
dbgprintf(Loaded purgatory at addr 0x%lx\n, info-rhdr.rel_addr);
+
/* The argument/parameter segment */
-   setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+   if (real_mode_entry) {
+   /* need to include size for bss and heap etc */
+   if (setup_header.protocol_version = 0x0201)
+   kern16_size_needed = setup_header.heap_end_ptr;
+   else
+   kern16_size_needed = kern16_size + 8192; /* bss */
+   if (kern16_size_needed  kern16_size)
+   kern16_size_needed = kern16_size;
+   if (kern16_size_needed  0xfffc)
+   die(kern16_size_needed is more then 64k\n);
+   heap_size = 0xfffc - kern16_size_needed; /* less 64k */
+   heap_size = ~(0x200 - 1);
+   kern16_size_needed += heap_size;
+   } else {
+   kern16_size_needed = kern16_size;
+   /* need to bigger than size of struct bootparams */
+   if (kern16_size_needed  4096)
+   kern16_size_needed = 4096;
+   }
+   setup_size = kern16_size_needed + command_line_len +
+PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
memset(real_mode, 0, setup_size);
if (!real_mode_entry) {
@@ -275,11 +298,18 @@ int do_bzImage_load(struct kexec_info *info,
 
/* Tell the kernel what is going on */
setup_linux_bootloader_parameters(info, real_mode, setup_base,
-   kern16_size, command_line, command_line_len,
+   kern16_size_needed, command_line, command_line_len,
initrd, initrd_len);
 
+   if (real_mode_entry  real_mode-protocol_version = 0x0201) {
+   real_mode-loader_flags |= 0x80; /* CAN_USE_HEAP */
+   real_mode-heap_end_ptr += heap_size - 0x200; /*stack*/
+   }
+
/* Get the initial register values */
-   elf_rel_get_symbol(info-rhdr, entry16_regs, regs16, 
sizeof(regs16));
+   if (real_mode_entry)
+   elf_rel_get_symbol(info-rhdr, entry16_regs,
+regs16, sizeof(regs16));
elf_rel_get_symbol(info-rhdr, entry32_regs, regs32, 
sizeof(regs32));
/*
 
@@ -298,16 +328,18 @@ int do_bzImage_load(struct kexec_info *info,
/*
 * Initialize the 16bit start information.
 */
-   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
-   regs16.cs = regs16.ds + 0x20;
-   regs16.ip = 0;
-   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
-   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 64*1024)  4;
-   /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */
-   regs16.esp = 0xFFFC;
if (real_mode_entry) {
+   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
+   regs16.cs = regs16.ds + 0x20;
+   regs16.ip = 0;
+   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
+   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 
64*1024)  4;
+   /* XXX: Documentation/i386/boot.txt says 'sp' must equal 
heap_end */
+   regs16.esp = 0xFFFC;
+
printf(Starting the kernel in real mode\n);
regs32.eip = elf_rel_get_addr(info-rhdr, entry16);
+   real_mode-kernel_start = kernel32_load_addr;
}
if (real_mode_entry  kexec_debug) {
unsigned long entry16_debug, pre32, first32;
@@ -327,10 +359,14 @@ int do_bzImage_load(struct kexec_info *info,

regs32.eip = entry16_debug;
}
-   elf_rel_set_symbol(info-rhdr, entry16_regs, regs16

[PATCH v9 6/7] kexec, x86_64: Load bzImage64 above 4G

2013-02-21 Thread Yinghai Lu
need to check xloadflags to see the bzImage is for 64bit relocatable.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.
-v4: use locate_hole find position first then add_buffer... suggested by Eric
 add buffer for kernel image at last to make kexec-load faster.
 use xloadflags in setup_header to tell if is bzImage64.
 remove not cross GB boundary searching.
 add --entry-32bit and --real-mode for skipping bzImage64.
-v5: add buffer with runtime size instead, so kernel could use BRK
 early and safely.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  304 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 5 files changed, 314 insertions(+), 1 deletion(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/i386/include/arch/options.h 
b/kexec/arch/i386/include/arch/options.h
index 89dbd26..aaac731 100644
--- a/kexec/arch/i386/include/arch/options.h
+++ b/kexec/arch/i386/include/arch/options.h
@@ -29,6 +29,7 @@
 #define OPT_MOD(OPT_ARCH_MAX+7)
 #define OPT_VGA(OPT_ARCH_MAX+8)
 #define OPT_REAL_MODE  (OPT_ARCH_MAX+9)
+#define OPT_ENTRY_32BIT(OPT_ARCH_MAX+10)
 
 /* Options relevant to the architecture (excluding loader-specific ones): */
 #define KEXEC_ARCH_OPTIONS \
@@ -68,7 +69,8 @@
{ args-linux, 0, NULL, OPT_ARGS_LINUX },  \
{ args-none,  0, NULL, OPT_ARGS_NONE },   \
{ module, 1, 0, OPT_MOD },\
-   { real-mode,  0, NULL, OPT_REAL_MODE },
+   { real-mode,  0, NULL, OPT_REAL_MODE },   \
+   { entry-32bit,0, NULL, OPT_ENTRY_32BIT },
 
 #define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR
 
diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c 
b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 000..1496573
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,304 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebied...@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include stddef.h
+#include stdio.h
+#include string.h
+#include limits.h
+#include stdlib.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#include fcntl.h
+#include unistd.h
+#include getopt.h
+#include elf.h
+#include boot/elf_boot.h
+#include ip_checksum.h
+#include x86/x86-linux.h
+#include ../../kexec.h
+#include ../../kexec-elf.h
+#include ../../kexec-syscall.h
+#include kexec-x86_64.h
+#include ../i386/x86-linux-setup.h
+#include ../i386/crashdump-x86.h
+#include arch/options.h
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+   const struct x86_linux_header *header;
+
+   if ((uintmax_t)len  (uintmax_t)(2 * 512)) {
+   if (probe_debug)
+   fprintf(stderr, File is too short to be a bzImage!\n);
+   return -1;
+   }
+   header = (const struct x86_linux_header *)buf;
+   if (memcmp(header-header_magic, HdrS, 4) != 0) {
+   if (probe_debug)
+   fprintf(stderr, Not a bzImage\n);
+   return -1;
+   }
+   if (header-boot_sector_magic != 0xAA55) {
+   if (probe_debug)
+   fprintf(stderr, No x86 boot sector present\n);
+   /* No x86 boot sector present */
+   return -1;
+   }
+   if (header-protocol_version  0x020C) {
+   if (probe_debug

[PATCH v9 1/7] kexec, x86: set booloader id in setup_header

2013-02-21 Thread Yinghai Lu
set LOADER_TYPE_KEXEC

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h   |1 +
 kexec/arch/i386/x86-linux-setup.c |2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 8f7a797..ea11042 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -142,6 +142,7 @@ struct x86_linux_param_header {
 #define LOADER_TYPE_BOOTSECT_LOADER 2
 #define LOADER_TYPE_SYSLINUX3
 #define LOADER_TYPE_ETHERBOOT   4
+#define LOADER_TYPE_KEXEC   0x0D
 #define LOADER_TYPE_UNKNOWN 0xFF
uint8_t  loader_flags;  /* 0x211 */
uint8_t  reserved12[2]; /* 0x212 */
diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index ef62553..d09c6ce 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -56,7 +56,7 @@ void setup_linux_bootloader_parameters(
unsigned long initrd_base, initrd_addr_max;
 
/* Say I'm a boot loader */
-   real_mode-loader_type = LOADER_TYPE_UNKNOWN;
+   real_mode-loader_type = LOADER_TYPE_KEXEC  4;
 
/* No loader flags */
real_mode-loader_flags = 0;
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v9 0/7] kexec: put bzImage and ramdisk above 4G for x86 64bit

2013-02-21 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes get merged to linus tree already.

here patches are for kexec tools to load bzImage and ramdisk above 4G
acccording to new added boot header fields.

it will only load high with 64bit kernel with 2.12 setup header.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.
-v4: remove the restriction about bzImage not crossing GB boundary.
 add real-mode fix for bzImage.
 add --entry-32bit and --real-mode for skip bzImage64.
-v5: use USE_EXT_BOOT_PARAMS bit in xloadflags.
-v6: use sentinel instead of USE_EXT_BOOT_PARAMS.
 add crashkernel_low support
-v7: Separate bootloader id setting in another patch
-v8: update for CAN_BE_LOADED_ABOVE_4G is changed to bit1.
 also make it appliable to current kexec-tools devel tree.
-v9: only copy setup_header when it is not with real_mode_entry.

Yinghai Lu (7):
  kexec, x86: set booloader id in setup_header
  kexec, x86: add boot header member for version 2.12
  kexec, x86: clean boot_params area for entry-32bit path
  kexec, x86: Fix bzImage real-mode booting
  kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage
  kexec, x86_64: Load bzImage64 above 4G
  kexec, x86: handle Crash low kernel range

 include/x86/x86-linux.h|   27 ++-
 kexec/arch/i386/crashdump-x86.c|   21 +++
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/i386/kexec-bzImage.c|   76 ++--
 kexec/arch/i386/x86-linux-setup.c  |   38 ++--
 kexec/arch/i386/x86-linux-setup.h  |   15 +-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  304 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 10 files changed, 460 insertions(+), 32 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v9 5/7] kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage

2013-02-21 Thread Yinghai Lu
We could put ramdisk/cmdline above for bzImage on 64bit for protocol 2.12.

-v2: change ext_... handling to way that eric like.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/x86-linux-setup.c |   36 +++-
 kexec/arch/i386/x86-linux-setup.h |   15 +--
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index d09c6ce..e0ddc84 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -46,11 +46,11 @@ void init_linux_parameters(struct x86_linux_param_header 
*real_mode)
real_mode-cmdline_size = COMMAND_LINE_SIZE;
 }
 
-void setup_linux_bootloader_parameters(
+void setup_linux_bootloader_parameters_high(
struct kexec_info *info, struct x86_linux_param_header *real_mode,
unsigned long real_mode_base, unsigned long cmdline_offset,
const char *cmdline, off_t cmdline_len,
-   const char *initrd_buf, off_t initrd_size)
+   const char *initrd_buf, off_t initrd_size, int initrd_high)
 {
char *cmdline_ptr;
unsigned long initrd_base, initrd_addr_max;
@@ -62,10 +62,15 @@ void setup_linux_bootloader_parameters(
real_mode-loader_flags = 0;
 
/* Find the maximum initial ramdisk address */
-   initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
-   if (real_mode-protocol_version = 0x0203) {
-   initrd_addr_max = real_mode-initrd_addr_max;
-   dbgprintf(initrd_addr_max is 0x%lx\n, initrd_addr_max);
+   if (initrd_high)
+   initrd_addr_max = ULONG_MAX;
+   else {
+   initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
+   if (real_mode-protocol_version = 0x0203) {
+   initrd_addr_max = real_mode-initrd_addr_max;
+   dbgprintf(initrd_addr_max is 0x%lx\n,
+initrd_addr_max);
+   }
}
 
/* Load the initrd if we have one */
@@ -81,8 +86,16 @@ void setup_linux_bootloader_parameters(
}
 
/* Ramdisk address and size */
-   real_mode-initrd_start = initrd_base;
-   real_mode-initrd_size  = initrd_size;
+   real_mode-initrd_start = initrd_base  0xUL;
+   real_mode-initrd_size  = initrd_size  0xUL;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_base  0xUL) != initrd_base)
+   real_mode-ext_ramdisk_image = initrd_base  32;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_size  0xUL) != initrd_size)
+   real_mode-ext_ramdisk_size = initrd_size  32;
 
/* The location of the command line */
/* if (real_mode_base == 0x9) { */
@@ -91,7 +104,12 @@ void setup_linux_bootloader_parameters(
/* setup_move_size */
/* } */
if (real_mode-protocol_version = 0x0202) {
-   real_mode-cmd_line_ptr = real_mode_base + cmdline_offset;
+   unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+   real_mode-cmd_line_ptr = cmd_line_ptr  0xUL;
+   if ((real_mode-protocol_version = 0x020c) 
+   ((cmd_line_ptr  0xUL) != cmd_line_ptr))
+   real_mode-ext_cmd_line_ptr = cmd_line_ptr  32;
}
 
/* Fill in the command line */
diff --git a/kexec/arch/i386/x86-linux-setup.h 
b/kexec/arch/i386/x86-linux-setup.h
index 96fbd33..09aed4d 100644
--- a/kexec/arch/i386/x86-linux-setup.h
+++ b/kexec/arch/i386/x86-linux-setup.h
@@ -2,11 +2,22 @@
 #define X86_LINUX_SETUP_H
 
 void init_linux_parameters(struct x86_linux_param_header *real_mode);
-void setup_linux_bootloader_parameters(
+void setup_linux_bootloader_parameters_high(
struct kexec_info *info, struct x86_linux_param_header *real_mode,
unsigned long real_mode_base, unsigned long cmdline_offset,
const char *cmdline, off_t cmdline_len,
-   const char *initrd_buf, off_t initrd_size);
+   const char *initrd_buf, off_t initrd_size, int initrd_high);
+static inline void setup_linux_bootloader_parameters(
+   struct kexec_info *info, struct x86_linux_param_header *real_mode,
+   unsigned long real_mode_base, unsigned long cmdline_offset,
+   const char *cmdline, off_t cmdline_len,
+   const char *initrd_buf, off_t initrd_size)
+{
+   setup_linux_bootloader_parameters_high(info,
+   real_mode, real_mode_base,
+   cmdline_offset, cmdline, cmdline_len,
+   initrd_buf, initrd_size, 0);
+}
 void setup_linux_system_parameters(struct kexec_info *info,
struct x86_linux_param_header *real_mode);
 
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v9 2/7] kexec, x86: add boot header member for version 2.12

2013-02-21 Thread Yinghai Lu
will use ext_ramdisk_image/size, and xloadflags to put
ramdisk and bzImage high for 64bit.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h |   26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index ea11042..0949dc2 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -32,7 +32,7 @@ struct drive_info_struct {
 };
 struct sys_desc_table {
uint16_t length;
-   uint8_t  table[318];
+   uint8_t  table[30];
 };
 
 struct apm_bios_info {
@@ -112,6 +112,10 @@ struct x86_linux_param_header {
struct apm_bios_info apm_bios_info; /* 0x40 */
struct drive_info_struct drive_info;/* 0x80 */
struct sys_desc_table sys_desc_table;   /* 0xa0 */
+   uint32_t ext_ramdisk_image; /* 0xc0 */
+   uint32_t ext_ramdisk_size;  /* 0xc4 */
+   uint32_t ext_cmd_line_ptr;  /* 0xc8 */
+   uint8_t reserved4_1[0x1e0 - 0xcc];  /* 0xcc */
uint32_t alt_mem_k; /* 0x1e0 */
uint8_t  reserved5[4];  /* 0x1e4 */
uint8_t  e820_map_nr;   /* 0x1e8 */
@@ -175,11 +179,18 @@ struct x86_linux_param_header {
/* 2.04+ */
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved15[3]; /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-   uint8_t  reserved16[0x290 - 0x248]; /* 0x248 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
+   uint8_t  reserved16[0x290 - 0x268]; /* 0x268 */
uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];   /* 0x290 */
 #endif
struct  e820entry e820_map[E820MAX];/* 0x2d0 */
@@ -196,7 +207,11 @@ struct x86_linux_faked_param_header {
 };
 
 struct x86_linux_header {
-   uint8_t  reserved1[0x1f1];  /* 0x000 */
+   uint8_t  reserved1[0xc0];   /* 0x000 */
+   uint32_t ext_ramdisk_image; /* 0x0c0 */
+   uint32_t ext_ramdisk_size;  /* 0x0c4 */
+   uint32_t ext_cmd_line_ptr;  /* 0x0c8 */
+   uint8_t  reserved1_1[0x1f1-0xcc];   /* 0x0cc */
uint8_t  setup_sects;   /* 0x1f1 */
uint16_t root_flags;/* 0x1f2 */
uint32_t syssize;   /* 0x1f4 */
@@ -229,7 +244,8 @@ struct x86_linux_header {
 
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved6[3];  /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v9 3/7] kexec, x86: clean boot_params area for entry-32bit path

2013-02-21 Thread Yinghai Lu
kexec bzImage path setup data is shared with real-mode path, and
setup_header is copied together with setup_code.
Later 32bit just use whole area as boot_params for real_mode_data.
but those area for boot_params around setup_header is
not cleaned that will leave some field in boot_param as
non-zero value.

So clean whole buffer at first, and only copy setup_header for non
real-mode entry path.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 83a023d..35005c7 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -211,7 +211,19 @@ int do_bzImage_load(struct kexec_info *info,
/* The argument/parameter segment */
setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
-   memcpy(real_mode, kernel, kern16_size);
+   memset(real_mode, 0, setup_size);
+   if (!real_mode_entry) {
+   unsigned long setup_header_size = kernel[0x201] + 0x202 - 0x1f1;
+
+   /* only copy setup_header */
+   if (setup_header_size  0x7f)
+   setup_header_size = 0x7f;
+   memcpy((unsigned char *)real_mode + 0x1f1, kernel + 0x1f1,
+   setup_header_size);
+   } else {
+   /* copy setup code and setup_header */
+   memcpy(real_mode, kernel, kern16_size);
+   }
 
if (info-kexec_flags  (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
/* If using bzImage for capture kernel, then we will not be
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v7 0/7] kexec: put bzImage and ramdisk above 4G for x86 64bit

2013-02-21 Thread Yinghai Lu
On Sun, Jan 27, 2013 at 6:39 PM, Yinghai Lu ying...@kernel.org wrote:
 On Sun, Jan 27, 2013 at 4:49 PM, Simon Horman ho...@verge.net.au wrote:
 Do you want to wait those kexec-tools patches hit tip tree or Linus tree?

 Yes, if you could ping me when they hit Linus's tree that would be great.

 ok, will resend them after it hit Linus's tree.

Hi, Simon,

Kernel parts change get merged into linus' tree today via hpa.

I just resent kexec-tools parts, with v9 in the subject.

Please check them.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] Cleanup kdump memmap= passing and e820 usage

2013-01-30 Thread Yinghai Lu
On Wed, Jan 30, 2013 at 8:52 AM, H. Peter Anvin h...@zytor.com wrote:
 The e820 map is fundamentally what you care about, and it has to be passed
 correctly anyway -- or your changes are utterly broken.  The modifications
 that have to be performed (from RAM to KDUMP) is trivial.

 I have to admit to being rather confused as to the separation of various
 bits of kdump between the host kernel and various user-space components, but
 the whole use of the command line to pass the memory map seems just broken
 in light of everything that can go wrong.

Thomas,

Can you try to work out patch for  kexec-tools that kdump change all
ram to KDUMP_RESERVED,
and only make crask_kernel  in /proc/iomem to be RAM type?

then we only kernel patch for kdump will check KDUMP_REDREVED and RAM
type to get saved_max_pfn.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v8 1/7] kexec, x86: set booloader id in setup_header

2013-01-30 Thread Yinghai Lu
set LOADER_TYPE_KEXEC

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h   |1 +
 kexec/arch/i386/x86-linux-setup.c |2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 8f7a797..ea11042 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -142,6 +142,7 @@ struct x86_linux_param_header {
 #define LOADER_TYPE_BOOTSECT_LOADER 2
 #define LOADER_TYPE_SYSLINUX3
 #define LOADER_TYPE_ETHERBOOT   4
+#define LOADER_TYPE_KEXEC   0x0D
 #define LOADER_TYPE_UNKNOWN 0xFF
uint8_t  loader_flags;  /* 0x211 */
uint8_t  reserved12[2]; /* 0x212 */
diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index ef62553..d09c6ce 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -56,7 +56,7 @@ void setup_linux_bootloader_parameters(
unsigned long initrd_base, initrd_addr_max;
 
/* Say I'm a boot loader */
-   real_mode-loader_type = LOADER_TYPE_UNKNOWN;
+   real_mode-loader_type = LOADER_TYPE_KEXEC  4;
 
/* No loader flags */
real_mode-loader_flags = 0;
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v8 4/7] kexec, x86: Fix bzImage real-mode booting

2013-01-30 Thread Yinghai Lu
We need to keep space for bss, heap/stack before command line.
otherwise command_line will be cleared by kernel 16bit init code.

also need to set 32bit start in real_mode header, kernel 16bit code
need to jump there.

Also don't touch regs16 if --real-mode is not specified.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   63 +++
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 1dfa3d7..add0646 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -130,6 +130,8 @@ int do_bzImage_load(struct kexec_info *info,
unsigned long kernel32_load_addr;
char *modified_cmdline;
unsigned long cmdline_end;
+   unsigned long kern16_size_needed;
+   unsigned long heap_size = 0;
 
/*
 * Find out about the file I am about to load.
@@ -221,9 +223,31 @@ int do_bzImage_load(struct kexec_info *info,
elf_rel_build_load(info, info-rhdr, purgatory, purgatory_size,
0x3000, 640*1024, -1, 0);
dbgprintf(Loaded purgatory at addr 0x%lx\n, info-rhdr.rel_addr);
+
/* The argument/parameter segment */
-   setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+   if (real_mode_entry) {
+   /* need to include size for bss and heap etc */
+   if (setup_header.protocol_version = 0x0201)
+   kern16_size_needed = setup_header.heap_end_ptr;
+   else
+   kern16_size_needed = kern16_size + 8192; /* bss */
+   if (kern16_size_needed  kern16_size)
+   kern16_size_needed = kern16_size;
+   if (kern16_size_needed  0xfffc)
+   die(kern16_size_needed is more then 64k\n);
+   heap_size = 0xfffc - kern16_size_needed; /* less 64k */
+   heap_size = ~(0x200 - 1);
+   kern16_size_needed += heap_size;
+   } else {
+   kern16_size_needed = kern16_size;
+   /* need to bigger than size of struct bootparams */
+   if (kern16_size_needed  4096)
+   kern16_size_needed = 4096;
+   }
+   setup_size = kern16_size_needed + command_line_len +
+PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
+   memset(real_mode, 0, setup_size);
memcpy(real_mode, kernel, kern16_size);
if (!real_mode_entry)
clean_boot_params((unsigned char *)real_mode, kern16_size);
@@ -278,11 +302,18 @@ int do_bzImage_load(struct kexec_info *info,
 
/* Tell the kernel what is going on */
setup_linux_bootloader_parameters(info, real_mode, setup_base,
-   kern16_size, command_line, command_line_len,
+   kern16_size_needed, command_line, command_line_len,
initrd, initrd_len);
 
+   if (real_mode_entry  real_mode-protocol_version = 0x0201) {
+   real_mode-loader_flags |= 0x80; /* CAN_USE_HEAP */
+   real_mode-heap_end_ptr += heap_size - 0x200; /*stack*/
+   }
+
/* Get the initial register values */
-   elf_rel_get_symbol(info-rhdr, entry16_regs, regs16, 
sizeof(regs16));
+   if (real_mode_entry)
+   elf_rel_get_symbol(info-rhdr, entry16_regs,
+regs16, sizeof(regs16));
elf_rel_get_symbol(info-rhdr, entry32_regs, regs32, 
sizeof(regs32));
/*
 
@@ -301,16 +332,18 @@ int do_bzImage_load(struct kexec_info *info,
/*
 * Initialize the 16bit start information.
 */
-   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
-   regs16.cs = regs16.ds + 0x20;
-   regs16.ip = 0;
-   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
-   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 64*1024)  4;
-   /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */
-   regs16.esp = 0xFFFC;
if (real_mode_entry) {
+   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
+   regs16.cs = regs16.ds + 0x20;
+   regs16.ip = 0;
+   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
+   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 
64*1024)  4;
+   /* XXX: Documentation/i386/boot.txt says 'sp' must equal 
heap_end */
+   regs16.esp = 0xFFFC;
+
printf(Starting the kernel in real mode\n);
regs32.eip = elf_rel_get_addr(info-rhdr, entry16);
+   real_mode-kernel_start = kernel32_load_addr;
}
if (real_mode_entry  kexec_debug) {
unsigned long entry16_debug, pre32, first32;
@@ -330,10 +363,14 @@ int do_bzImage_load(struct kexec_info *info

[PATCH v8 3/7] kexec, x86: clean boot_params area for entry-32bit path

2013-01-30 Thread Yinghai Lu
kexec bzImage path setup data is shared with real-mode path, and
setup_header is copied together with setup_code.
Later 32bit just use whole area as boot_params for real_mode_data.
but those area for boot_params around setup_header is
not cleaned that will leave some field in boot_param as
non-zero value.

So clean around setup_header area for non real-mode entry path.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   15 +++
 1 file changed, 15 insertions(+)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 83a023d..1dfa3d7 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -98,6 +98,19 @@ void bzImage_usage(void)

 }
 
+static void clean_boot_params(unsigned char *real_mode, unsigned long size)
+{
+   unsigned long end;
+
+   /* clear value before header */
+   memset(real_mode, 0, 0x1f1);
+   /* clear value after setup_header  */
+   end = *(real_mode + 0x201);
+   end += 0x202;
+   if (end  size)
+   memset(real_mode + end, 0, size - end);
+}
+
 int do_bzImage_load(struct kexec_info *info,
const char *kernel, off_t kernel_len,
const char *command_line, off_t command_line_len,
@@ -212,6 +225,8 @@ int do_bzImage_load(struct kexec_info *info,
setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
memcpy(real_mode, kernel, kern16_size);
+   if (!real_mode_entry)
+   clean_boot_params((unsigned char *)real_mode, kern16_size);
 
if (info-kexec_flags  (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
/* If using bzImage for capture kernel, then we will not be
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v8 0/7] kexec: put bzImage and ramdisk above 4G for x86 64bit

2013-01-30 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes get into tip tree now and could be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm2

here patches are for kexec tools to load bzImage and ramdisk above 4G
acccording to new added boot header fields.

it will only load high with 64bit kernel with 2.12 setup header.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.
-v4: remove the restriction about bzImage not crossing GB boundary.
 add real-mode fix for bzImage.
 add --entry-32bit and --real-mode for skip bzImage64.
-v5: use USE_EXT_BOOT_PARAMS bit in xloadflags.
-v6: use sentinel instead of USE_EXT_BOOT_PARAMS.
 add crashkernel_low support
-v7: Separate bootloader id setting in another patch
-v8: update for CAN_BE_LOADED_ABOVE_4G is changed to bit1.
 also make it appliable to current kexec-tools devel tree.

Yinghai Lu (7):
  kexec, x86: set booloader id in setup_header
  kexec, x86: add boot header member for version 2.12
  kexec, x86: clean boot_params area for entry-32bit path
  kexec, x86: Fix bzImage real-mode booting
  kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage
  kexec, x86_64: Load bzImage64 above 4G
  kexec, x86: handle Crash low kernel range

 include/x86/x86-linux.h|   27 ++-
 kexec/arch/i386/crashdump-x86.c|   21 +++
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/i386/kexec-bzImage.c|   78 ++--
 kexec/arch/i386/x86-linux-setup.c  |   38 +++-
 kexec/arch/i386/x86-linux-setup.h  |   15 +-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  312 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 10 files changed, 471 insertions(+), 31 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v8 2/7] kexec, x86: add boot header member for version 2.12

2013-01-30 Thread Yinghai Lu
will use ext_ramdisk_image/size, and xloadflags to put
ramdisk and bzImage high for 64bit.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h |   26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index ea11042..0949dc2 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -32,7 +32,7 @@ struct drive_info_struct {
 };
 struct sys_desc_table {
uint16_t length;
-   uint8_t  table[318];
+   uint8_t  table[30];
 };
 
 struct apm_bios_info {
@@ -112,6 +112,10 @@ struct x86_linux_param_header {
struct apm_bios_info apm_bios_info; /* 0x40 */
struct drive_info_struct drive_info;/* 0x80 */
struct sys_desc_table sys_desc_table;   /* 0xa0 */
+   uint32_t ext_ramdisk_image; /* 0xc0 */
+   uint32_t ext_ramdisk_size;  /* 0xc4 */
+   uint32_t ext_cmd_line_ptr;  /* 0xc8 */
+   uint8_t reserved4_1[0x1e0 - 0xcc];  /* 0xcc */
uint32_t alt_mem_k; /* 0x1e0 */
uint8_t  reserved5[4];  /* 0x1e4 */
uint8_t  e820_map_nr;   /* 0x1e8 */
@@ -175,11 +179,18 @@ struct x86_linux_param_header {
/* 2.04+ */
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved15[3]; /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-   uint8_t  reserved16[0x290 - 0x248]; /* 0x248 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
+   uint8_t  reserved16[0x290 - 0x268]; /* 0x268 */
uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];   /* 0x290 */
 #endif
struct  e820entry e820_map[E820MAX];/* 0x2d0 */
@@ -196,7 +207,11 @@ struct x86_linux_faked_param_header {
 };
 
 struct x86_linux_header {
-   uint8_t  reserved1[0x1f1];  /* 0x000 */
+   uint8_t  reserved1[0xc0];   /* 0x000 */
+   uint32_t ext_ramdisk_image; /* 0x0c0 */
+   uint32_t ext_ramdisk_size;  /* 0x0c4 */
+   uint32_t ext_cmd_line_ptr;  /* 0x0c8 */
+   uint8_t  reserved1_1[0x1f1-0xcc];   /* 0x0cc */
uint8_t  setup_sects;   /* 0x1f1 */
uint16_t root_flags;/* 0x1f2 */
uint32_t syssize;   /* 0x1f4 */
@@ -229,7 +244,8 @@ struct x86_linux_header {
 
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved6[3];  /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v8 7/7] kexec, x86: handle Crash low kernel range

2013-01-30 Thread Yinghai Lu
kernel could have that in /proc/iomem, will use it for kdump kernel
for dma32

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/crashdump-x86.c |   21 +
 1 file changed, 21 insertions(+)

diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c
index 245402c..83bff5e 100644
--- a/kexec/arch/i386/crashdump-x86.c
+++ b/kexec/arch/i386/crashdump-x86.c
@@ -188,6 +188,8 @@ static struct memory_range 
crash_memory_range[CRASH_MAX_MEMORY_RANGES];
 
 /* Memory region reserved for storing panic kernel and other data. */
 static struct memory_range crash_reserved_mem;
+/* under 4G parts */
+static struct memory_range crash_reserved_low_mem;
 
 /* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
  * create Elf headers. Keeping it separate from get_memory_ranges() as
@@ -282,6 +284,10 @@ static int get_crash_memory_ranges(struct memory_range 
**range, int *ranges,
if (exclude_region(memory_ranges, crash_reserved_mem.start,
crash_reserved_mem.end)  0)
return -1;
+   if (crash_reserved_low_mem.start 
+   exclude_region(memory_ranges, crash_reserved_low_mem.start,
+   crash_reserved_low_mem.end)  0)
+   return -1;
if (gart) {
/* exclude GART region if the system has one */
if (exclude_region(memory_ranges, gart_start, gart_end)  0)
@@ -984,6 +990,12 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
return ENOCRASHKERNEL;
}
 
+   if (crash_reserved_low_mem.start) {
+   sz = crash_reserved_low_mem.end - crash_reserved_low_mem.start
++1;
+   add_memmap(memmap_p, crash_reserved_low_mem.start, sz);
+   }
+
/* Create a backup region segment to store backup data*/
if (!(info-kexec_flags  KEXEC_PRESERVE_CONTEXT)) {
sz = (info-backup_src_size + align)  ~(align - 1);
@@ -1059,5 +1071,14 @@ int is_crashkernel_mem_reserved(void)
crash_reserved_mem.end = end;
crash_reserved_mem.type = RANGE_RAM;
 
+   /* If there is no Crash low kernel, still can go on */
+   if (parse_iomem_single(Crash kernel low\n, start, end) ||
+   start == end)
+   return 1;
+
+   crash_reserved_low_mem.start = start;
+   crash_reserved_low_mem.end = end;
+   crash_reserved_low_mem.type = RANGE_RAM;
+
return 1;
 }
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v8 6/7] kexec, x86_64: Load bzImage64 above 4G

2013-01-30 Thread Yinghai Lu
need to check xloadflags to see the bzImage is for 64bit relocatable.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.
-v4: use locate_hole find position first then add_buffer... suggested by Eric
 add buffer for kernel image at last to make kexec-load faster.
 use xloadflags in setup_header to tell if is bzImage64.
 remove not cross GB boundary searching.
 add --entry-32bit and --real-mode for skipping bzImage64.
-v5: add buffer with runtime size instead, so kernel could use BRK
 early and safely.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  312 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 5 files changed, 322 insertions(+), 1 deletion(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/i386/include/arch/options.h 
b/kexec/arch/i386/include/arch/options.h
index 89dbd26..aaac731 100644
--- a/kexec/arch/i386/include/arch/options.h
+++ b/kexec/arch/i386/include/arch/options.h
@@ -29,6 +29,7 @@
 #define OPT_MOD(OPT_ARCH_MAX+7)
 #define OPT_VGA(OPT_ARCH_MAX+8)
 #define OPT_REAL_MODE  (OPT_ARCH_MAX+9)
+#define OPT_ENTRY_32BIT(OPT_ARCH_MAX+10)
 
 /* Options relevant to the architecture (excluding loader-specific ones): */
 #define KEXEC_ARCH_OPTIONS \
@@ -68,7 +69,8 @@
{ args-linux, 0, NULL, OPT_ARGS_LINUX },  \
{ args-none,  0, NULL, OPT_ARGS_NONE },   \
{ module, 1, 0, OPT_MOD },\
-   { real-mode,  0, NULL, OPT_REAL_MODE },
+   { real-mode,  0, NULL, OPT_REAL_MODE },   \
+   { entry-32bit,0, NULL, OPT_ENTRY_32BIT },
 
 #define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR
 
diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c 
b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 000..0075f28
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,312 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebied...@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include stddef.h
+#include stdio.h
+#include string.h
+#include limits.h
+#include stdlib.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#include fcntl.h
+#include unistd.h
+#include getopt.h
+#include elf.h
+#include boot/elf_boot.h
+#include ip_checksum.h
+#include x86/x86-linux.h
+#include ../../kexec.h
+#include ../../kexec-elf.h
+#include ../../kexec-syscall.h
+#include kexec-x86_64.h
+#include ../i386/x86-linux-setup.h
+#include ../i386/crashdump-x86.h
+#include arch/options.h
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+   const struct x86_linux_header *header;
+
+   if ((uintmax_t)len  (uintmax_t)(2 * 512)) {
+   if (probe_debug)
+   fprintf(stderr, File is too short to be a bzImage!\n);
+   return -1;
+   }
+   header = (const struct x86_linux_header *)buf;
+   if (memcmp(header-header_magic, HdrS, 4) != 0) {
+   if (probe_debug)
+   fprintf(stderr, Not a bzImage\n);
+   return -1;
+   }
+   if (header-boot_sector_magic != 0xAA55) {
+   if (probe_debug)
+   fprintf(stderr, No x86 boot sector present\n);
+   /* No x86 boot sector present */
+   return -1;
+   }
+   if (header-protocol_version  0x020C) {
+   if (probe_debug

[PATCH v8 5/7] kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage

2013-01-30 Thread Yinghai Lu
We could put ramdisk/cmdline above for bzImage on 64bit for protocol 2.12.

-v2: change ext_... handling to way that eric like.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/x86-linux-setup.c |   36 +++-
 kexec/arch/i386/x86-linux-setup.h |   15 +--
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index d09c6ce..e0ddc84 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -46,11 +46,11 @@ void init_linux_parameters(struct x86_linux_param_header 
*real_mode)
real_mode-cmdline_size = COMMAND_LINE_SIZE;
 }
 
-void setup_linux_bootloader_parameters(
+void setup_linux_bootloader_parameters_high(
struct kexec_info *info, struct x86_linux_param_header *real_mode,
unsigned long real_mode_base, unsigned long cmdline_offset,
const char *cmdline, off_t cmdline_len,
-   const char *initrd_buf, off_t initrd_size)
+   const char *initrd_buf, off_t initrd_size, int initrd_high)
 {
char *cmdline_ptr;
unsigned long initrd_base, initrd_addr_max;
@@ -62,10 +62,15 @@ void setup_linux_bootloader_parameters(
real_mode-loader_flags = 0;
 
/* Find the maximum initial ramdisk address */
-   initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
-   if (real_mode-protocol_version = 0x0203) {
-   initrd_addr_max = real_mode-initrd_addr_max;
-   dbgprintf(initrd_addr_max is 0x%lx\n, initrd_addr_max);
+   if (initrd_high)
+   initrd_addr_max = ULONG_MAX;
+   else {
+   initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
+   if (real_mode-protocol_version = 0x0203) {
+   initrd_addr_max = real_mode-initrd_addr_max;
+   dbgprintf(initrd_addr_max is 0x%lx\n,
+initrd_addr_max);
+   }
}
 
/* Load the initrd if we have one */
@@ -81,8 +86,16 @@ void setup_linux_bootloader_parameters(
}
 
/* Ramdisk address and size */
-   real_mode-initrd_start = initrd_base;
-   real_mode-initrd_size  = initrd_size;
+   real_mode-initrd_start = initrd_base  0xUL;
+   real_mode-initrd_size  = initrd_size  0xUL;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_base  0xUL) != initrd_base)
+   real_mode-ext_ramdisk_image = initrd_base  32;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_size  0xUL) != initrd_size)
+   real_mode-ext_ramdisk_size = initrd_size  32;
 
/* The location of the command line */
/* if (real_mode_base == 0x9) { */
@@ -91,7 +104,12 @@ void setup_linux_bootloader_parameters(
/* setup_move_size */
/* } */
if (real_mode-protocol_version = 0x0202) {
-   real_mode-cmd_line_ptr = real_mode_base + cmdline_offset;
+   unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+   real_mode-cmd_line_ptr = cmd_line_ptr  0xUL;
+   if ((real_mode-protocol_version = 0x020c) 
+   ((cmd_line_ptr  0xUL) != cmd_line_ptr))
+   real_mode-ext_cmd_line_ptr = cmd_line_ptr  32;
}
 
/* Fill in the command line */
diff --git a/kexec/arch/i386/x86-linux-setup.h 
b/kexec/arch/i386/x86-linux-setup.h
index 96fbd33..09aed4d 100644
--- a/kexec/arch/i386/x86-linux-setup.h
+++ b/kexec/arch/i386/x86-linux-setup.h
@@ -2,11 +2,22 @@
 #define X86_LINUX_SETUP_H
 
 void init_linux_parameters(struct x86_linux_param_header *real_mode);
-void setup_linux_bootloader_parameters(
+void setup_linux_bootloader_parameters_high(
struct kexec_info *info, struct x86_linux_param_header *real_mode,
unsigned long real_mode_base, unsigned long cmdline_offset,
const char *cmdline, off_t cmdline_len,
-   const char *initrd_buf, off_t initrd_size);
+   const char *initrd_buf, off_t initrd_size, int initrd_high);
+static inline void setup_linux_bootloader_parameters(
+   struct kexec_info *info, struct x86_linux_param_header *real_mode,
+   unsigned long real_mode_base, unsigned long cmdline_offset,
+   const char *cmdline, off_t cmdline_len,
+   const char *initrd_buf, off_t initrd_size)
+{
+   setup_linux_bootloader_parameters_high(info,
+   real_mode, real_mode_base,
+   cmdline_offset, cmdline, cmdline_len,
+   initrd_buf, initrd_size, 0);
+}
 void setup_linux_system_parameters(struct kexec_info *info,
struct x86_linux_param_header *real_mode);
 
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v8 3/7] kexec, x86: clean boot_params area for entry-32bit path

2013-01-30 Thread Yinghai Lu
On Wed, Jan 30, 2013 at 1:34 PM, H. Peter Anvin h...@zytor.com wrote:
 On 01/30/2013 01:25 PM, Yinghai Lu wrote:

 +static void clean_boot_params(unsigned char *real_mode, unsigned long size)
 +{
 + unsigned long end;
 +
 + /* clear value before header */
 + memset(real_mode, 0, 0x1f1);
 + /* clear value after setup_header  */
 + end = *(real_mode + 0x201);

 real_mode[0x201] might be clearer...

 + end += 0x202;
 + if (end  size)
 + memset(real_mode + end, 0, size - end);
 +}

 You don't actually need the test... the value is inherently smaller than
 0x301 which is less than the size.

 That being said, if you want to sanity-check it you can check that the
 value is in a sensible range -- the permitted range is 0x22 to 0x7f
 inclusive, corresponding to a total end value of 0x224 to 0x281.

yes.

how about clear all and copy only setup_header?

that looks more readable.

Index: kexec-tools/kexec/arch/i386/kexec-bzImage.c
===
--- kexec-tools.orig/kexec/arch/i386/kexec-bzImage.c
+++ kexec-tools/kexec/arch/i386/kexec-bzImage.c
@@ -211,7 +211,16 @@ int do_bzImage_load(struct kexec_info *i
/* The argument/parameter segment */
setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
-   memcpy(real_mode, kernel, kern16_size);
+   if (!real_mode_entry) {
+   unsigned long size = kernel[0x201] + 0x202 - 0x1f1;
+
+   /* only copy setup_header */
+   memset(real_mode, 0, setup_size);
+   if (size  0x7f)
+   size = 0x7f;
+   memcpy(real_mode + 0x1f1, kernel + 0x1f1, size);
+   } else
+   memcpy(real_mode, kernel, kern16_size);

if (info-kexec_flags  (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
/* If using bzImage for capture kernel, then we will not be

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] Cleanup kdump memmap= passing and e820 usage

2013-01-30 Thread Yinghai Lu
On Wed, Jan 30, 2013 at 2:41 PM, H. Peter Anvin h...@zytor.com wrote:
 On 01/30/2013 02:29 PM, Eric W. Biederman wrote:
 The bigger question is if we need a separate value from the current
 E820_RESERVED_KERN.  Since it is always easier to have multiple values
 with the same semantics than it is to have too few, I would still prefer
 we added a new E820_RESERVED_KDUMP, which would then be 21.

current for E820_RESERVED_KERN: during filling memblock.memory,
it will be treated as E820_RAM, and also memblock.reserve already have entries
for them.

For E820_RESERVED_KDUMP, looks only usage is for kernel to find saved_max_pfn.

So we may have to separate them.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] x86 e820: Check for exactmap appearance when parsing first memmap option

2013-01-28 Thread Yinghai Lu
On Mon, Jan 28, 2013 at 5:09 PM, H. Peter Anvin h...@zytor.com wrote:
 On 01/22/2013 07:20 AM, Thomas Renninger wrote:
 From: Yinghai Lu ying...@kernel.org

 memmap=exactmap will throw away all original, but also until then
 user defined (through other provided memmap= parameters) areas.
 That means all memmap= boot parameters passed before a memmap=exactmap
 parameter are not recognized.
 Without this fix:
 memmap=x@y memmap=exactmap memmap=i#k
 only i#k would get recognized.

 This is wrong, this fix will only throw away all original e820 areas once
 when memmap=exactmap is found in the whole boot command line and before
 any other memmap= option is parsed.


 I don't understand why this is wrong.  The kernel command line is always
 parsed from left to right, and I don't see anything inherently
 problematic with that with something like a big hammer like exactmap.

Ok, let's drop it.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 2/2] x86 e820: Introduce memmap=resetusablemap for kdump usage

2013-01-28 Thread Yinghai Lu
On Mon, Jan 28, 2013 at 5:11 PM, H. Peter Anvin h...@zytor.com wrote:
 So I guess the final patch should be:
- Add a new e820 type:
 E820_KDUMP_RESERVED /* Originally usable memory where the crashed
 kernel kernel resided in 
 */
   - Use Yinghai's last posted patch, but instead of:
 + e820_update_range(0, ULLONG_MAX, E820_RAM,
 +   E820_RESERVED);
 ...
 + e820_remove_range(start_at, mem_size, E820_RESERVED, 
 0);
 do:
 + e820_update_range(0, ULLONG_MAX, E820_RAM,
 +   E820_KDUMP_RESERVED);
 ...
 + e820_remove_range(start_at, mem_size, 
 E820_KDUMP_RESERVED, 0);

   - Come up with another memmap=kdump_reserve_ram memmap option name
 or however it should get named...

 If this proposal gets accepted, I can send a tested patch...


 Yes, this is much saner.  There really shouldn't need to be an option,
 even; since the tools need to be modified anyway, just modify the actual
 memory map data structure itself.

yes,

kexec-tools will change that to E820_KDUMP_RESERVED (or other good name).

We only need to update kernel to get old max_pfn by
checking E820_KDUMP_RESERVED.

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 2/2] x86 e820: Introduce memmap=resetusablemap for kdump usage

2013-01-28 Thread Yinghai Lu
On Mon, Jan 28, 2013 at 6:11 PM, H. Peter Anvin h...@zytor.com wrote:
 On 01/28/2013 06:10 PM, Yinghai Lu wrote:


 kexec-tools will change that to E820_KDUMP_RESERVED (or other good name).

 We only need to update kernel to get old max_pfn by
 checking E820_KDUMP_RESERVED.


 OK, I have asked this before, but I still have not gotten any acceptable
 answer:

 Why do we still have max_*_pfn at all?  Shouldn't it all be based on
 memblocks by now?

saved_max_pfn is used for kdump:
drivers/char/mem.c::read_oldmem will stop there.
...
while (count) {
pfn = *ppos / PAGE_SIZE;
if (pfn  saved_max_pfn)
return read;
...

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 2/2] x86 e820: Introduce memmap=resetusablemap for kdump usage

2013-01-28 Thread Yinghai Lu
On Mon, Jan 28, 2013 at 6:27 PM, H. Peter Anvin h...@zytor.com wrote:
 To be more clear: the max_pfn stuff seems like a relic of the past, and I am 
 wondering what it would take to get rid of it.

 It clearly has the wrong semantics, except perhaps in the most trivial 
 allocator models.

one thing i think could be : use that decide if we need iommu/swiotlb.
like in
arch/x86/kernel/amd_gart_64.c
arch/x86/kernel/pci-swiotlb.c
drivers/iommu/amd_iommu.c
...

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v7 0/7] kexec: put bzImage and ramdisk above 4G for x86 64bit

2013-01-27 Thread Yinghai Lu
On Sun, Jan 27, 2013 at 4:49 PM, Simon Horman ho...@verge.net.au wrote:
 Do you want to wait those kexec-tools patches hit tip tree or Linus tree?

 Yes, if you could ping me when they hit Linus's tree that would be great.

ok, will resend them after it hit Linus's tree.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v7 0/7] kexec: put bzImage and ramdisk above 4G for x86 64bit

2013-01-25 Thread Yinghai Lu
On Fri, Jan 25, 2013 at 1:08 AM, Simon Horman ho...@verge.net.au wrote:
 On Thu, Jan 24, 2013 at 12:44:17PM -0800, Yinghai Lu wrote:
 Now we have limit kdump reserved under 896M, because kexec has the 
 limitation.
 and also bzImage need to stay under 4g.

 kernel parts changes could be found at:
 
 git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
 for-x86-boot

 here patches are for kexec tools to load bzImage and ramdisk above 4G
 acccording to new added boot header fields.

 -v3: address review from Eric to use locate_hole at first.
  use xloadflags instead.
 -v4: remove the restriction about bzImage not crossing GB boundary.
  add real-mode fix for bzImage.
  add --entry-32bit and --real-mode for skip bzImage64.
 -v5: use USE_EXT_BOOT_PARAMS bit in xloadflags.
 -v6: use sentinel instead of USE_EXT_BOOT_PARAMS.
  add crashkernel_low support
 -v7: Separate bootloader id setting in another patch

 Hi Yinghai, Hi All,

 my current thinking with regards to this is that I would like to take
 these changes into the kexec-tools tree once there is consensus and
 kernel portions have been accepted. If that makes sense please let me know
 when those conditions have been met. Otherwise lets discuss a different
 strategy.

Peter said he will put kernel related patches in tip soon with some
edits from him.

Do you want to wait those kexec-tools patches hit tip tree or Linus tree?

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v7 4/7] kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage

2013-01-24 Thread Yinghai Lu
We could put ramdisk/cmdline above for bzImage on 64bit for protocol 2.12.

-v2: change ext_... handling to way that eric like.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/x86-linux-setup.c |   25 +
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..eb8b794 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -64,7 +64,11 @@ void setup_linux_bootloader_parameters(
/* Find the maximum initial ramdisk address */
initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
if (real_mode-protocol_version = 0x0203) {
-   initrd_addr_max = real_mode-initrd_addr_max;
+   if (real_mode-protocol_version = 0x020c 
+   real_mode-xloadflags  (10)) /* CAN_BE_LOADED_ABOVE_4G */
+   initrd_addr_max = ULONG_MAX;
+   else
+   initrd_addr_max = real_mode-initrd_addr_max;
dbgprintf(initrd_addr_max is 0x%lx\n, initrd_addr_max);
}
 
@@ -81,8 +85,16 @@ void setup_linux_bootloader_parameters(
}
 
/* Ramdisk address and size */
-   real_mode-initrd_start = initrd_base;
-   real_mode-initrd_size  = initrd_size;
+   real_mode-initrd_start = initrd_base  0xUL;
+   real_mode-initrd_size  = initrd_size  0xUL;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_base  0xUL) != initrd_base)
+   real_mode-ext_ramdisk_image = initrd_base  32;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_size  0xUL) != initrd_size)
+   real_mode-ext_ramdisk_size = initrd_size  32;
 
/* The location of the command line */
/* if (real_mode_base == 0x9) { */
@@ -91,7 +103,12 @@ void setup_linux_bootloader_parameters(
/* setup_move_size */
/* } */
if (real_mode-protocol_version = 0x0202) {
-   real_mode-cmd_line_ptr = real_mode_base + cmdline_offset;
+   unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+   real_mode-cmd_line_ptr = cmd_line_ptr  0xUL;
+   if ((real_mode-protocol_version = 0x020c) 
+   ((cmd_line_ptr  0xUL) != cmd_line_ptr))
+   real_mode-ext_cmd_line_ptr = cmd_line_ptr  32;
}
 
/* Fill in the command line */
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v7 6/7] kexec, x86_64: Load bzImage64 above 4G

2013-01-24 Thread Yinghai Lu
need to check xloadflags to see the bzImage is for 64bit relocatable.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.
-v4: use locate_hole find position first then add_buffer... suggested by Eric
 add buffer for kernel image at last to make kexec-load faster.
 use xloadflags in setup_header to tell if is bzImage64.
 remove not cross GB boundary searching.
 add --entry-32bit and --real-mode for skipping bzImage64.
-v5: add buffer with runtime size instead, so kernel could use BRK
 early and safely.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  312 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 5 files changed, 322 insertions(+), 1 deletion(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/i386/include/arch/options.h 
b/kexec/arch/i386/include/arch/options.h
index 89dbd26..aaac731 100644
--- a/kexec/arch/i386/include/arch/options.h
+++ b/kexec/arch/i386/include/arch/options.h
@@ -29,6 +29,7 @@
 #define OPT_MOD(OPT_ARCH_MAX+7)
 #define OPT_VGA(OPT_ARCH_MAX+8)
 #define OPT_REAL_MODE  (OPT_ARCH_MAX+9)
+#define OPT_ENTRY_32BIT(OPT_ARCH_MAX+10)
 
 /* Options relevant to the architecture (excluding loader-specific ones): */
 #define KEXEC_ARCH_OPTIONS \
@@ -68,7 +69,8 @@
{ args-linux, 0, NULL, OPT_ARGS_LINUX },  \
{ args-none,  0, NULL, OPT_ARGS_NONE },   \
{ module, 1, 0, OPT_MOD },\
-   { real-mode,  0, NULL, OPT_REAL_MODE },
+   { real-mode,  0, NULL, OPT_REAL_MODE },   \
+   { entry-32bit,0, NULL, OPT_ENTRY_32BIT },
 
 #define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR
 
diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c 
b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 000..e2b2412
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,312 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebied...@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include stddef.h
+#include stdio.h
+#include string.h
+#include limits.h
+#include stdlib.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#include fcntl.h
+#include unistd.h
+#include getopt.h
+#include elf.h
+#include boot/elf_boot.h
+#include ip_checksum.h
+#include x86/x86-linux.h
+#include ../../kexec.h
+#include ../../kexec-elf.h
+#include ../../kexec-syscall.h
+#include kexec-x86_64.h
+#include ../i386/x86-linux-setup.h
+#include ../i386/crashdump-x86.h
+#include arch/options.h
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+   const struct x86_linux_header *header;
+
+   if ((uintmax_t)len  (uintmax_t)(2 * 512)) {
+   if (probe_debug)
+   fprintf(stderr, File is too short to be a bzImage!\n);
+   return -1;
+   }
+   header = (const struct x86_linux_header *)buf;
+   if (memcmp(header-header_magic, HdrS, 4) != 0) {
+   if (probe_debug)
+   fprintf(stderr, Not a bzImage\n);
+   return -1;
+   }
+   if (header-boot_sector_magic != 0xAA55) {
+   if (probe_debug)
+   fprintf(stderr, No x86 boot sector present\n);
+   /* No x86 boot sector present */
+   return -1;
+   }
+   if (header-protocol_version  0x020C) {
+   if (probe_debug

[PATCH v7 5/7] kexec, x86: set booloader id in setup_header

2013-01-24 Thread Yinghai Lu
set LOADER_TYPE_KEXEC

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h   |1 +
 kexec/arch/i386/x86-linux-setup.c |2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 50c2595..0949dc2 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -146,6 +146,7 @@ struct x86_linux_param_header {
 #define LOADER_TYPE_BOOTSECT_LOADER 2
 #define LOADER_TYPE_SYSLINUX3
 #define LOADER_TYPE_ETHERBOOT   4
+#define LOADER_TYPE_KEXEC   0x0D
 #define LOADER_TYPE_UNKNOWN 0xFF
uint8_t  loader_flags;  /* 0x211 */
uint8_t  reserved12[2]; /* 0x212 */
diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index eb8b794..c72773c 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -56,7 +56,7 @@ void setup_linux_bootloader_parameters(
unsigned long initrd_base, initrd_addr_max;
 
/* Say I'm a boot loader */
-   real_mode-loader_type = LOADER_TYPE_UNKNOWN;
+   real_mode-loader_type = LOADER_TYPE_KEXEC  4;
 
/* No loader flags */
real_mode-loader_flags = 0;
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v7 2/7] kexec, x86: clean boot_params area for entry-32bit path

2013-01-24 Thread Yinghai Lu
kexec bzImage path setup data is shared with real-mode path, and
setup_header is copied together with setup_code.
Later 32bit just use whole area as boot_params for real_mode_data.
but those area for boot_params around setup_header is
not cleaned that will leave some field in boot_param as
non-zero value.

So clean around setup_header area for non real-mode entry path.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   16 
 1 file changed, 16 insertions(+)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 0605909..4cc394d 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -98,6 +98,19 @@ void bzImage_usage(void)

 }
 
+static void clean_boot_params(unsigned char *real_mode, unsigned long size)
+{
+   unsigned long end;
+
+   /* clear value before header */
+   memset(real_mode, 0, 0x1f1);
+   /* clear value after setup_header  */
+   end = *(real_mode + 0x201);
+   end += 0x202;
+   if (end  size)
+   memset(real_mode + end, 0, size - end);
+}
+
 int do_bzImage_load(struct kexec_info *info,
const char *kernel, off_t kernel_len,
const char *command_line, off_t command_line_len,
@@ -212,6 +225,9 @@ int do_bzImage_load(struct kexec_info *info,
setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
memcpy(real_mode, kernel, kern16_size);
+   if (!real_mode_entry)
+   clean_boot_params((unsigned char *)real_mode, kern16_size);
+   real_mode-xloadflags = ~(10); /* clear CAN_BE_LOADED_ABOVE_4G */
 
if (info-kexec_flags  (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
/* If using bzImage for capture kernel, then we will not be
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v7 7/7] kexec, x86: handle Crash low kernel range

2013-01-24 Thread Yinghai Lu
kernel could have that in /proc/iomem, will use it for kdump kernel
for dma32

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/crashdump-x86.c |   21 +
 1 file changed, 21 insertions(+)

diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c
index 245402c..83bff5e 100644
--- a/kexec/arch/i386/crashdump-x86.c
+++ b/kexec/arch/i386/crashdump-x86.c
@@ -188,6 +188,8 @@ static struct memory_range 
crash_memory_range[CRASH_MAX_MEMORY_RANGES];
 
 /* Memory region reserved for storing panic kernel and other data. */
 static struct memory_range crash_reserved_mem;
+/* under 4G parts */
+static struct memory_range crash_reserved_low_mem;
 
 /* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
  * create Elf headers. Keeping it separate from get_memory_ranges() as
@@ -282,6 +284,10 @@ static int get_crash_memory_ranges(struct memory_range 
**range, int *ranges,
if (exclude_region(memory_ranges, crash_reserved_mem.start,
crash_reserved_mem.end)  0)
return -1;
+   if (crash_reserved_low_mem.start 
+   exclude_region(memory_ranges, crash_reserved_low_mem.start,
+   crash_reserved_low_mem.end)  0)
+   return -1;
if (gart) {
/* exclude GART region if the system has one */
if (exclude_region(memory_ranges, gart_start, gart_end)  0)
@@ -984,6 +990,12 @@ int load_crashdump_segments(struct kexec_info *info, char* 
mod_cmdline,
return ENOCRASHKERNEL;
}
 
+   if (crash_reserved_low_mem.start) {
+   sz = crash_reserved_low_mem.end - crash_reserved_low_mem.start
++1;
+   add_memmap(memmap_p, crash_reserved_low_mem.start, sz);
+   }
+
/* Create a backup region segment to store backup data*/
if (!(info-kexec_flags  KEXEC_PRESERVE_CONTEXT)) {
sz = (info-backup_src_size + align)  ~(align - 1);
@@ -1059,5 +1071,14 @@ int is_crashkernel_mem_reserved(void)
crash_reserved_mem.end = end;
crash_reserved_mem.type = RANGE_RAM;
 
+   /* If there is no Crash low kernel, still can go on */
+   if (parse_iomem_single(Crash kernel low\n, start, end) ||
+   start == end)
+   return 1;
+
+   crash_reserved_low_mem.start = start;
+   crash_reserved_low_mem.end = end;
+   crash_reserved_low_mem.type = RANGE_RAM;
+
return 1;
 }
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v7 0/7] kexec: put bzImage and ramdisk above 4G for x86 64bit

2013-01-24 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk above 4G
acccording to new added boot header fields.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.
-v4: remove the restriction about bzImage not crossing GB boundary.
 add real-mode fix for bzImage.
 add --entry-32bit and --real-mode for skip bzImage64.
-v5: use USE_EXT_BOOT_PARAMS bit in xloadflags.
-v6: use sentinel instead of USE_EXT_BOOT_PARAMS.
 add crashkernel_low support
-v7: Separate bootloader id setting in another patch

Yinghai Lu (7):
  kexec, x86: add boot header member for version 2.12
  kexec, x86: clean boot_params area for entry-32bit path
  kexec, x86: Fix bzImage real-mode booting
  kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage
  kexec, x86: set booloader id in setup_header
  kexec, x86_64: Load bzImage64 above 4G
  kexec, x86: handle Crash low kernel range

 include/x86/x86-linux.h|   27 ++-
 kexec/arch/i386/crashdump-x86.c|   21 +++
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/i386/kexec-bzImage.c|   79 ++--
 kexec/arch/i386/x86-linux-setup.c  |   27 ++-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  312 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 9 files changed, 453 insertions(+), 24 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v7 3/7] kexec, x86: Fix bzImage real-mode booting

2013-01-24 Thread Yinghai Lu
We need to keep space for bss, heap/stack before command line.
otherwise command_line will be cleared by kernel 16bit init code.

also need to set 32bit start in real_mode header, kernel 16bit code
need to jump there.

Also don't touch regs16 if --real-mode is not specified.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   63 +++
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 4cc394d..1a33d47 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -130,6 +130,8 @@ int do_bzImage_load(struct kexec_info *info,
unsigned long kernel32_load_addr;
char *modified_cmdline;
unsigned long cmdline_end;
+   unsigned long kern16_size_needed;
+   unsigned long heap_size = 0;
 
/*
 * Find out about the file I am about to load.
@@ -221,9 +223,31 @@ int do_bzImage_load(struct kexec_info *info,
elf_rel_build_load(info, info-rhdr, purgatory, purgatory_size,
0x3000, 640*1024, -1, 0);
dbgprintf(Loaded purgatory at addr 0x%lx\n, info-rhdr.rel_addr);
+
/* The argument/parameter segment */
-   setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+   if (real_mode_entry) {
+   /* need to include size for bss and heap etc */
+   if (setup_header.protocol_version = 0x0201)
+   kern16_size_needed = setup_header.heap_end_ptr;
+   else
+   kern16_size_needed = kern16_size + 8192; /* bss */
+   if (kern16_size_needed  kern16_size)
+   kern16_size_needed = kern16_size;
+   if (kern16_size_needed  0xfffc)
+   die(kern16_size_needed is more then 64k\n);
+   heap_size = 0xfffc - kern16_size_needed; /* less 64k */
+   heap_size = ~(0x200 - 1);
+   kern16_size_needed += heap_size;
+   } else {
+   kern16_size_needed = kern16_size;
+   /* need to bigger than size of struct bootparams */
+   if (kern16_size_needed  4096)
+   kern16_size_needed = 4096;
+   }
+   setup_size = kern16_size_needed + command_line_len +
+PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
+   memset(real_mode, 0, setup_size);
memcpy(real_mode, kernel, kern16_size);
if (!real_mode_entry)
clean_boot_params((unsigned char *)real_mode, kern16_size);
@@ -279,11 +303,18 @@ int do_bzImage_load(struct kexec_info *info,
 
/* Tell the kernel what is going on */
setup_linux_bootloader_parameters(info, real_mode, setup_base,
-   kern16_size, command_line, command_line_len,
+   kern16_size_needed, command_line, command_line_len,
initrd, initrd_len);
 
+   if (real_mode_entry  real_mode-protocol_version = 0x0201) {
+   real_mode-loader_flags |= 0x80; /* CAN_USE_HEAP */
+   real_mode-heap_end_ptr += heap_size - 0x200; /*stack*/
+   }
+
/* Get the initial register values */
-   elf_rel_get_symbol(info-rhdr, entry16_regs, regs16, 
sizeof(regs16));
+   if (real_mode_entry)
+   elf_rel_get_symbol(info-rhdr, entry16_regs,
+regs16, sizeof(regs16));
elf_rel_get_symbol(info-rhdr, entry32_regs, regs32, 
sizeof(regs32));
/*
 
@@ -302,16 +333,18 @@ int do_bzImage_load(struct kexec_info *info,
/*
 * Initialize the 16bit start information.
 */
-   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
-   regs16.cs = regs16.ds + 0x20;
-   regs16.ip = 0;
-   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
-   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 64*1024)  4;
-   /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */
-   regs16.esp = 0xFFFC;
if (real_mode_entry) {
+   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
+   regs16.cs = regs16.ds + 0x20;
+   regs16.ip = 0;
+   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
+   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 
64*1024)  4;
+   /* XXX: Documentation/i386/boot.txt says 'sp' must equal 
heap_end */
+   regs16.esp = 0xFFFC;
+
printf(Starting the kernel in real mode\n);
regs32.eip = elf_rel_get_addr(info-rhdr, entry16);
+   real_mode-kernel_start = kernel32_load_addr;
}
if (real_mode_entry  kexec_debug) {
unsigned long entry16_debug, pre32, first32;
@@ -331,10 +364,14 @@ int do_bzImage_load(struct kexec_info *info

[PATCH v7 1/7] kexec, x86: add boot header member for version 2.12

2013-01-24 Thread Yinghai Lu
will use ext_ramdisk_image/size, and xloadflags to put
ramdisk and bzImage high for 64bit.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h |   26 +-
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 8f7a797..50c2595 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -32,7 +32,7 @@ struct drive_info_struct {
 };
 struct sys_desc_table {
uint16_t length;
-   uint8_t  table[318];
+   uint8_t  table[30];
 };
 
 struct apm_bios_info {
@@ -112,6 +112,10 @@ struct x86_linux_param_header {
struct apm_bios_info apm_bios_info; /* 0x40 */
struct drive_info_struct drive_info;/* 0x80 */
struct sys_desc_table sys_desc_table;   /* 0xa0 */
+   uint32_t ext_ramdisk_image; /* 0xc0 */
+   uint32_t ext_ramdisk_size;  /* 0xc4 */
+   uint32_t ext_cmd_line_ptr;  /* 0xc8 */
+   uint8_t reserved4_1[0x1e0 - 0xcc];  /* 0xcc */
uint32_t alt_mem_k; /* 0x1e0 */
uint8_t  reserved5[4];  /* 0x1e4 */
uint8_t  e820_map_nr;   /* 0x1e8 */
@@ -174,11 +178,18 @@ struct x86_linux_param_header {
/* 2.04+ */
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved15[3]; /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-   uint8_t  reserved16[0x290 - 0x248]; /* 0x248 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
+   uint8_t  reserved16[0x290 - 0x268]; /* 0x268 */
uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];   /* 0x290 */
 #endif
struct  e820entry e820_map[E820MAX];/* 0x2d0 */
@@ -195,7 +206,11 @@ struct x86_linux_faked_param_header {
 };
 
 struct x86_linux_header {
-   uint8_t  reserved1[0x1f1];  /* 0x000 */
+   uint8_t  reserved1[0xc0];   /* 0x000 */
+   uint32_t ext_ramdisk_image; /* 0x0c0 */
+   uint32_t ext_ramdisk_size;  /* 0x0c4 */
+   uint32_t ext_cmd_line_ptr;  /* 0x0c8 */
+   uint8_t  reserved1_1[0x1f1-0xcc];   /* 0x0cc */
uint8_t  setup_sects;   /* 0x1f1 */
uint16_t root_flags;/* 0x1f2 */
uint32_t syssize;   /* 0x1f4 */
@@ -228,7 +243,8 @@ struct x86_linux_header {
 
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved6[3];  /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 2/2] x86 e820: Introduce memmap=resetusablemap for kdump usage

2013-01-24 Thread Yinghai Lu
On Tue, Jan 22, 2013 at 12:06 PM, Yinghai Lu ying...@kernel.org wrote:
 On Tue, Jan 22, 2013 at 8:32 AM, H. Peter Anvin h...@zytor.com wrote:
 Again: Please explain what is bad with this solution.
 I cannot see a better and more robust way for kdump other than
 reserving the original reserved memory areas as declared by the BIOS.

 It is bad because it creates more complexity than is needed.

 The whole point is that what we want is simply to switch type 1 to type
 X, with the sole exceptions being the areas explicitly reserved for the
 kdump kernel.

 Do you prefer to  reserveram way in attached patch?

Hi, Thomas,

Can you please check attached reserveram version on your setup?

If it is ok, i will put it in for-x86-boot patchset and send it to
Peter for v3.9.

Thanks

Yinghai


e820_reserveram_v2.patch
Description: Binary data
___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] x86 e820: Check for exactmap appearance when parsing first memmap option

2013-01-22 Thread Yinghai Lu
On Tue, Jan 22, 2013 at 7:20 AM, Thomas Renninger tr...@suse.de wrote:

 memmap=exactmap will throw away all original, but also until then
 user defined (through other provided memmap= parameters) areas.
 That means all memmap= boot parameters passed before a memmap=exactmap
 parameter are not recognized.
 Without this fix:
 memmap=x@y memmap=exactmap memmap=i#k
 only i#k would get recognized.

 This is wrong, this fix will only throw away all original e820 areas once
 when memmap=exactmap is found in the whole boot command line and before
 any other memmap= option is parsed.

Actually I put this patch already in for-x86-boot branch.

http://git.kernel.org/?p=linux/kernel/git/yinghai/linux-yinghai.git;a=commit;h=bd40ee79abf2351109ebd469b0b9ba6a8f3b0872

And will update change log to following to merge two change log.

---
Subject: [PATCH] x86: Handle multiple exactmaps and out of order exactmap

Current code expect that we only have one exactmap and exactmap need to
first one in memmap=.

memmap=exactmap will throw away all original entries, but also until
then user defined (through other provided memmap= parameters) areas.
That means all memmap= boot parameters passed before a memmap=exactmap
parameter are not recognized.

Without this fix:
memmap=x@y memmap=exactmap memmap=i#k
only i#k would get recognized.  This is wrong.

This fix will scan the boot_command_line to find if there is exactmap at
first and only throw away all original e820 entries once, and then parse
other memmap= option.

-v2: incorporate change log from Thomas Renninger.

Signed-off-by: Yinghai Lu ying...@kernel.org
Reviewed-by: Thomas Renninger tr...@suse.de

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86 e820: only void usable memory areas in memmap=exactmap case

2013-01-14 Thread Yinghai Lu
On Mon, Jan 14, 2013 at 4:54 PM, Thomas Renninger tr...@suse.de wrote:
 On Monday, January 14, 2013 11:04:36 AM Yinghai Lu wrote:
 On Mon, Jan 14, 2013 at 7:05 AM, Thomas Renninger tr...@suse.de wrote:
  What is this for?:
  @@ -871,6 +879,11 @@ static int __init parse_memmap_one(char
 
  userdef = 1;
  if (*p == '@') {
 
  start_at = memparse(p+1, p);
 
  +   if (exactusablemap_parsed) {
  +   /* remove all range with other types */
  +   e820_remove_range(start_at, mem_size,
  +E820_RAM, 0);
  +   }
 
  e820_add_region(start_at, mem_size, E820_RAM);
 
  } else if (*p == '#') {
 
  start_at = memparse(p+1, p);

 remove all old renges before add E820_RAM, otherwise new add E820
 ranges could be ignored.
 But this is intended?
 kexec must never request reserved memory to be used as ordinary E820_RAM
 by the kdump kernel.
 This also reverts what exactusablemap is all about:
 Keep all reserved memory ranges of the original BIOS map.

 Above would again wrongly remove the mmconf and other reserved regions
 if kexec passes memmap=exactuseablemap,x@y

 From what I can see the patch looks fine, but above part should
 simply be left out.

then, I would like to rename it to resetusablemap instead.

like attached.

Thanks

Yinghai


e820_resetusablemap.patch
Description: Binary data
___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86 e820: only void usable memory areas in memmap=exactmap case

2013-01-12 Thread Yinghai Lu
On Sat, Jan 12, 2013 at 3:31 AM, Thomas Renninger tr...@suse.de wrote:
 memmap=exactmap [KNL,X86] Enable setting of an exact
 -   E820 memory map, as specified by the user.
 -   Such memmap=exactmap lines can be constructed based on
 -   BIOS output or other requirements. See the 
 memmap=nn@ss
 -   option description.
 +   E820 usable memory map, as specified by the user.
 +   All unusable (reserved, ACPI, NVS,...) ranges from the
 +   original e820 table are preserved.
 +   But the usable memory regions from the original e820
 +   table are removed.
 +   This parameter is explicitly for kdump usage:
 +   The memory the kdump kernel is allowed to use must
 +   be passed via below memmap=nn[KMG]@ss[KMG] param.
 +   All reserved regions the kernel may use for 
 ioremapping
 +   and similar are still considered.
 +
 +   memmap=voidmap  [KNL,X86] Do not use any e820 ranges from BIOS or
 +   bootloader. Instead you have to pass regions via
 +   below memmap= options.

I would suggest to keep memmap=exactmap meaning not changed, and add
memmap=exactusablemap
instead.

kexec-tools could be updated to support exactusablemap with
kernelversion checking for kdump.

also we need to double check to make sure:
1. exactmap should override exactusablemap, even the out of order sequence.
2. when exactusablemap is used, not just remove old usable type range,
also need to remove overlapped range
with new usable range.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86 e820: only void usable memory areas in memmap=exactmap case

2013-01-11 Thread Yinghai Lu
On Fri, Jan 11, 2013 at 4:33 AM, Thomas Renninger tr...@suse.de wrote:
 yes, we have other user for debug  like simulating user memmap for some
 bugs.
 current problem for exactmap is that we don't scan that at first.
 attached patch could help that.

 Yep, this is what I would have come up as well or similar. I looked
 at it, but I had no time for doing it and trying out.

 You may want to add:
 Reviewed-by: Thomas Renninger tr...@suse.de
 if someone reposts.

ok, I will add wrap it up and add changelog and test it then post it
with my for-x86-boot.


Thomas

 ---
 x86 e820: only void usable memory areas in memmap=exactmap case

 All unusable (reserved, ACPI, ACPI NVS,...) areas have to be
 honored in kdump case.
 Othwerise ACPI parts will quickly run into trouble when trying
 to for example early_ioremap reserved areas which are not
 declared reserved in kdump kernel.
 mmconf area must also be a reserved mem region.
 ...

 Passing unusable memory via memmap= is a design flaw as
 this information is already (exactly for this purpose) passed
 via bootloader structure.
 In kdump case (when memmap=exactmap is passed), only void
 (do not use) usable memory regions from the passed e820 table
 and use memory areas defined via memmap=X@Y boot parameter instead.
 But do still use the unusable memory regions from the original e820
 table.

 Signed-off-by: Thomas Renninger tr...@suse.de

 ---
  arch/x86/kernel/e820.c |   19 ++-
  1 files changed, 18 insertions(+), 1 deletions(-)

 diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
 index dc0b9f0..ae2d657 100644
 --- a/arch/x86/kernel/e820.c
 +++ b/arch/x86/kernel/e820.c
 @@ -559,6 +559,19 @@ u64 __init e820_remove_range(u64 start, u64 size, 
 unsigned old_type,
 return real_removed_size;
  }

 +static void __init e820_remove_range_type(u32 type)
 +{
 +   int i;
 +
 +   for (i = 0; i  e820.nr_map; i++) {
 +   struct e820entry *ei = e820.map[i];
 +   if (ei-type == type) {
 +   memset(ei, 0, sizeof(struct e820entry));
 +   continue;
 +   }
 +   }
 +}
 +
  void __init update_e820(void)
  {
 u32 nr_map;
 @@ -858,7 +871,11 @@ static int __init parse_memmap_one(char *p)
  */
 saved_max_pfn = e820_end_of_ram_pfn();
  #endif
 -   e820.nr_map = 0;
 +   /*
 +* Remove all usable memory (this is for kdump), usable
 +* memory will be passed via memmap=X@Y parameter
 +*/
 +   e820_remove_range_type(E820_RAM);

We may need to keep exactmap intact.

but could add another one like exact_ram_map
or extend to have memmap=exactmap=ram or etc.

 userdef = 1;
 return 0;
 }

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86 e820: only void usable memory areas in memmap=exactmap case

2013-01-11 Thread Yinghai Lu
On Fri, Jan 11, 2013 at 10:24 AM, Thomas Renninger tr...@suse.de wrote:
 We may need to keep exactmap intact.
 Why?
 Kexec/kdump should have been the only user?
 If older/current kexec calls still add ACPI maps via memmap=X#Y,
 they should already exist in the original e820 map and fall off or
 get glued to one region if (wrongly) overlapping via sanitize_map.

No, kexec/kdump is not the only user for memmap=exactmap.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] x86 e820: only void usable memory areas in memmap=exactmap case

2013-01-11 Thread Yinghai Lu
On Fri, Jan 11, 2013 at 12:06 PM, H. Peter Anvin h...@zytor.com wrote:
 On 01/11/2013 11:59 AM, Yinghai Lu wrote:
 On Fri, Jan 11, 2013 at 10:24 AM, Thomas Renninger tr...@suse.de wrote:
 We may need to keep exactmap intact.
 Why?
 Kexec/kdump should have been the only user?
 If older/current kexec calls still add ACPI maps via memmap=X#Y,
 they should already exist in the original e820 map and fall off or
 get glued to one region if (wrongly) overlapping via sanitize_map.

 No, kexec/kdump is not the only user for memmap=exactmap.


 Who is using it then, since you seem to know?

http://forums.gentoo.org/viewtopic-t-487476-highlight-proliant.html

http://forums.fedoraforum.org/archive/index.php/t-225347.html

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] Only reset e820 once, even with multiple memmap=exactmap params

2013-01-10 Thread Yinghai Lu
On Thu, Jan 10, 2013 at 6:26 AM, Vivek Goyal vgo...@redhat.com wrote:

 This happens only in case of kdump and not kexec. In case of kdump
 we want second kernel to use only selected memory areas.

 In fact this is one improvement area. Instead of using memmap= entries
 in kdump case, we should probably modify the e820 map passed in
 zero page and get rid of memmap= entries.

then how the kdump kernel get saved_max_pfn?

may have problem for read_oldmem with crashed kernel.

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] Only reset e820 once, even with multiple memmap=exactmap params

2013-01-10 Thread Yinghai Lu
On Thu, Jan 10, 2013 at 9:01 AM, Vivek Goyal vgo...@redhat.com wrote:
 On Thu, Jan 10, 2013 at 08:53:18AM -0800, Yinghai Lu wrote:
 On Thu, Jan 10, 2013 at 6:26 AM, Vivek Goyal vgo...@redhat.com wrote:
 
  This happens only in case of kdump and not kexec. In case of kdump
  we want second kernel to use only selected memory areas.
 
  In fact this is one improvement area. Instead of using memmap= entries
  in kdump case, we should probably modify the e820 map passed in
  zero page and get rid of memmap= entries.

 then how the kdump kernel get saved_max_pfn?


 Oh, I forgot about that. May be we can pass saved_max_pfn on command line
 instead of passing all memmap= entries.

then we create another kernel version/kexec tools dependency.

kexec tools need to check kernel version, to see if saved_max_pfn= is supported.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] Only reset e820 once, even with multiple memmap=exactmap params

2013-01-08 Thread Yinghai Lu
On Tue, Jan 8, 2013 at 8:47 AM, Thomas Renninger tr...@suse.de wrote:
 On Tuesday, January 08, 2013 04:04:56 AM Yinghai Lu wrote:
 On Mon, Jan 7, 2013 at 4:42 PM, Thomas Renninger tr...@suse.de wrote:
  memmap=256M$3584M

 may need to change to:

 memmap=256M\$\$3584M
 The problem is (beside the special char $) that
 memmap=exactmap boot param resets all e820 maps every time the
 parameter is processed.
 And:
 /sbin/kexec -p xy --append=... --initrd yx
 seem to magically add (append):
 memmap=exactmap memmap=640K@0K memmap=392556K@115328K elfcorehdr=507884K 
 memmap=252K#3099760K

 therefore all memmap= I try to pass are voided out by:
 memmap=exactmap
 which is always added by kexec after my params.

 I could come around with attached patch and passing:
 /sbin/kexec -p xy --append='... memmap=exactmap memmap=256M$3584M' --initrd yx

 Now mmconfig is working in kdump kernel.
 This would mean mmconfig is broken by design in kexec?

 Only way to fix this I can think of is to export
 mmconfig area through /sys  (../kernel/debug/mmconfig?, possibly
 already in X$Y format?) in the productive kernel and make kexec add it
 like the other memmap= params automatically.

 I'll attach the output in a separate mail.

Thomas

 ---

 x86 e820: Do not reset e820 map twice, even if memmap=exactmap is passed as 
 boot param several times

 This is needed to be able to explicitly pass (debug) e820
 modifications through kexec via memmap=.
 Otherwise those get voided by kexec appending memmap=exactmap always
 after the user defined boot parameters.

 Signed-off-by: Thomas Renninger tr...@suse.de

  linux-2.6_t/arch/x86/kernel/e820.c |4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 Index: git/linux-2.6_t/arch/x86/kernel/e820.c
 ===
 --- git.orig/linux-2.6_t/arch/x86/kernel/e820.c
 +++ git/linux-2.6_t/arch/x86/kernel/e820.c
 @@ -845,7 +845,9 @@ static int __init parse_memmap_opt(char

 if (!strncmp(p, exactmap, 8)) {
  #ifdef CONFIG_CRASH_DUMP
 -   /*
 +   /* memmap=exactmap passed twice, do not reset tables again */
 +   if (saved_max_pfn)
 +   return 0;   /*
  * If we are doing a crash dump, we still need to know
  * the real mem size before original memory map is
  * reset.

that exactmap logic still have problem:
We need to check exactmap at first, aka need to scan the whole comand line to
see if exactmap is there at first and reset e820 tables then handle
other memmap opt.

Also please update your patch after

tip/x86/mm2

I have one patch that process memmap= with , there.

http://git.kernel.org/?p=linux/kernel/git/tip/tip.git;a=commitdiff;h=9710f581bb4c35589ac046b0cfc0deb7f369fc85

We could put exactmap scanning in new parse_memmap_opt.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v7 0/5] Reset PCIe devices to address DMA problem on kdump with iommu

2013-01-07 Thread Yinghai Lu
On Mon, Jan 7, 2013 at 11:09 AM, Thomas Renninger tr...@suse.de wrote:
 e820: BIOS-provided physical RAM map:
 BIOS-e820: [mem 0x0100-0x0009bfff] usable
 BIOS-e820: [mem 0x0010-0xbd2e] usable
 BIOS-e820: [mem 0xbd2f-0xbd31bfff] reserved
 BIOS-e820: [mem 0xbd31c000-0xbd35afff] ACPI data
 BIOS-e820: [mem 0xbd35b000-0xbfff] reserved
 BIOS-e820: [mem 0xe000-0xefff] reserved
 BIOS-e820: [mem 0xfe00-0x] reserved
 BIOS-e820: [mem 0x0001-0x00603fff] usable
 debug: ignoring loglevel setting.
 e820: last_pfn = 0x604 max_arch_pfn = 0x4
 NX (Execute Disable) protection: active
 e820: user-defined physical RAM map:
 user: [mem 0x-0x0009] usable
 user: [mem 0x070a-0x1effafff] usable
 user: [mem 0xbd31c000-0xbd35afff] ACPI data

can you make sure kdump kernel command line take
memmap=256M$3584M ?

it will make mmconf working.

 megasas: 06.504.01.00-rc1 Mon. Oct. 1 17:00:00 PDT 2012

also need to append debug ignore_loglevel to see why the disks are not
probed.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v7 0/5] Reset PCIe devices to address DMA problem on kdump with iommu

2013-01-07 Thread Yinghai Lu
On Mon, Jan 7, 2013 at 4:42 PM, Thomas Renninger tr...@suse.de wrote:
 memmap=256M$3584M

may need to change to:

memmap=256M\$\$3584M

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v7 0/5] Reset PCIe devices to address DMA problem on kdump with iommu

2012-12-21 Thread Yinghai Lu
On Fri, Nov 30, 2012 at 7:49 AM, MUNEDA Takahiro
muneda.takah...@jp.fujitsu.com wrote:
 On Tue, 27 Nov 2012 09:42:20 +0900 (JST),
 Takao Indoh indou.ta...@jp.fujitsu.com wrote:

 These patches reset PCIe devices at boot time to address DMA problem on
 kdump with iommu. When reset_devices is specified, a hot reset is
 triggered on each PCIe root port and downstream port to reset its
 downstream endpoint.

ThomasR, Can you check if this one help your test case about interrupt ?

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v6 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit

2012-12-13 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk above 4G
acccording to new added boot header fields.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.
-v4: remove the restriction about bzImage not crossing GB boundary.
 add real-mode fix for bzImage.
 add --entry-32bit and --real-mode for skip bzImage64.
-v5: use USE_EXT_BOOT_PARAMS bit in xloadflags.
-v6: use sentinel instead of USE_EXT_BOOT_PARAMS.
 add crashkernel_low support

Yinghai Lu (6):
  kexec, x86: add boot header member for version 2.12
  kexec, x86: clean boot_params area for entry-32bit path
  kexec, x86: Fix bzImage real-mode booting
  kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage
  kexec, x86_64: Load bzImage64 above 4G
  kexec, x86: handle Crash low kernel range

 include/x86/x86-linux.h|   33 +++-
 kexec/arch/i386/crashdump-x86.c|   21 +++
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/i386/kexec-bzImage.c|   81 +++--
 kexec/arch/i386/x86-linux-setup.c  |   27 ++-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  312 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 9 files changed, 461 insertions(+), 24 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.10.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec x86_64: Make purgatory relocatable anywhere in the 64bit address space.

2012-12-03 Thread Yinghai Lu
On Mon, Nov 19, 2012 at 8:56 AM, Eric W. Biederman
ebied...@xmission.com wrote:
 diff --git a/purgatory/Makefile b/purgatory/Makefile
 index ee1679c..e39adec 100644
 --- a/purgatory/Makefile
 +++ b/purgatory/Makefile
 @@ -64,6 +64,7 @@ $(PURGATORY): $(PURGATORY_OBJS)
 $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^

  #  $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) --no-undefined -e purgatory_start 
 -r -o $@ $(PURGATORY_OBJS) $(UTIL_LIB)
 +   $(STRIP) --strip-debug $@

  echo::
 @echo PURGATORY_SRCS $(PURGATORY_SRCS)

looks like configure does not set
STRIP in top Makefile

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: Teach configure to find the strip binary.

2012-12-03 Thread Yinghai Lu
On Mon, Dec 3, 2012 at 5:26 PM, Eric W. Biederman ebied...@xmission.com wrote:
 Yinghai Lu ying...@kernel.org writes:

 STRIP = strip

 Run bootstrap.  At the very least it looks like you haven't regeneratged
 configure by running autoconf.

works. Thanks.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v5 0/5] kexec: put bzImage and ramdisk above 4G for x86 64bit

2012-12-03 Thread Yinghai Lu
On Mon, Dec 3, 2012 at 11:02 PM, H. Peter Anvin h...@zytor.com wrote:
 On 12/03/2012 09:47 PM, Simon Horman wrote:

 Hi Everyone,

 there was some healthy discussion around the previous versions
 of this series, but so far none on this version. Have we reached
 consensus?


 No, we haven't converged on the boot protocol yet and just respinning the
 patch set doesn't change that.

please check if you are ok with bit 15 in xloadflags.

+Field name: xloadflags
+Type:   modify (obligatory)
+Offset/size:0x236/2
+Protocol:   2.12+
+
+  This field is a bitmask.
+
+  Bit 0 (read): CAN_BE_LOADED_ABOVE_4G
+- If 1, kernel/boot_params/cmdline/ramdisk can be above 4g,
+
+  Bit 15 (write): USE_EXT_BOOT_PARAMS
+   - If 1, set by bootloader, and kernel could check new fields
+   in boot_params that are added from 2.12 safely.

Thanks

Yinghai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 1/5] kexec, x86: add boot header member for version 2.12

2012-11-28 Thread Yinghai Lu
will use ext_ramdisk_image/size, and xloadflags to put
ramdisk and bzImage high for 64bit.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h |   32 +++-
 1 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 27af02b..a2452f2 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -32,7 +32,7 @@ struct drive_info_struct {
 };
 struct sys_desc_table {
uint16_t length;
-   uint8_t  table[318];
+   uint8_t  table[30];
 };
 
 struct apm_bios_info {
@@ -112,6 +112,10 @@ struct x86_linux_param_header {
struct apm_bios_info apm_bios_info; /* 0x40 */
struct drive_info_struct drive_info;/* 0x80 */
struct sys_desc_table sys_desc_table;   /* 0xa0 */
+   uint32_t ext_ramdisk_image; /* 0xc0 */
+   uint32_t ext_ramdisk_size;  /* 0xc4 */
+   uint32_t ext_cmd_line_ptr;  /* 0xc8 */
+   uint8_t reserved4_1[0x1e0 - 0xcc];  /* 0xcc */
uint32_t alt_mem_k; /* 0x1e0 */
uint8_t  reserved5[4];  /* 0x1e4 */
uint8_t  e820_map_nr;   /* 0x1e8 */
@@ -174,11 +178,18 @@ struct x86_linux_param_header {
/* 2.04+ */
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved15[3]; /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-   uint8_t  reserved16[0x290 - 0x248]; /* 0x248 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
+   uint8_t  reserved16[0x290 - 0x268]; /* 0x268 */
uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];   /* 0x290 */
 #endif
struct  e820entry e820_map[E820MAX];/* 0x2d0 */
@@ -195,7 +206,11 @@ struct x86_linux_faked_param_header {
 };
 
 struct x86_linux_header {
-   uint8_t  reserved1[0x1f1];  /* 0x000 */
+   uint8_t  reserved1[0xc0];   /* 0x000 */
+   uint32_t ext_ramdisk_image; /* 0x0c0 */
+   uint32_t ext_ramdisk_size;  /* 0x0c4 */
+   uint32_t ext_cmd_line_ptr;  /* 0x0c8 */
+   uint8_t  reserved1_1[0x1f1-0xcc];   /* 0x0cc */
uint8_t  setup_sects;   /* 0x1f1 */
uint16_t root_flags;/* 0x1f2 */
uint16_t syssize;   /* 0x1f4 */
@@ -241,10 +256,17 @@ struct x86_linux_header {
 #else
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved6[3];  /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
 #endif
 } PACKED;
 
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 5/5] kexec, x86_64: Load bzImage64 above 4G

2012-11-28 Thread Yinghai Lu
need to check xloadflags to see the bzImage is for 64bit relocatable.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.
-v4: use locate_hole find position first then add_buffer... suggested by Eric
 add buffer for kernel image at last to make kexec-load faster.
 use xloadflags in setup_header to tell if is bzImage64.
 remove not cross GB boundary searching.
 add --entry-32bit and --real-mode for skipping bzImage64.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  335 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 5 files changed, 345 insertions(+), 1 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/i386/include/arch/options.h 
b/kexec/arch/i386/include/arch/options.h
index 89dbd26..aaac731 100644
--- a/kexec/arch/i386/include/arch/options.h
+++ b/kexec/arch/i386/include/arch/options.h
@@ -29,6 +29,7 @@
 #define OPT_MOD(OPT_ARCH_MAX+7)
 #define OPT_VGA(OPT_ARCH_MAX+8)
 #define OPT_REAL_MODE  (OPT_ARCH_MAX+9)
+#define OPT_ENTRY_32BIT(OPT_ARCH_MAX+10)
 
 /* Options relevant to the architecture (excluding loader-specific ones): */
 #define KEXEC_ARCH_OPTIONS \
@@ -68,7 +69,8 @@
{ args-linux, 0, NULL, OPT_ARGS_LINUX },  \
{ args-none,  0, NULL, OPT_ARGS_NONE },   \
{ module, 1, 0, OPT_MOD },\
-   { real-mode,  0, NULL, OPT_REAL_MODE },
+   { real-mode,  0, NULL, OPT_REAL_MODE },   \
+   { entry-32bit,0, NULL, OPT_ENTRY_32BIT },
 
 #define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR
 
diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c 
b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 000..9442a65
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,335 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebied...@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include stddef.h
+#include stdio.h
+#include string.h
+#include limits.h
+#include stdlib.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#include fcntl.h
+#include unistd.h
+#include getopt.h
+#include elf.h
+#include boot/elf_boot.h
+#include ip_checksum.h
+#include x86/x86-linux.h
+#include ../../kexec.h
+#include ../../kexec-elf.h
+#include ../../kexec-syscall.h
+#include kexec-x86_64.h
+#include ../i386/x86-linux-setup.h
+#include ../i386/crashdump-x86.h
+#include arch/options.h
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+   const struct x86_linux_header *header;
+
+   if ((uintmax_t)len  (uintmax_t)(2 * 512)) {
+   if (probe_debug)
+   fprintf(stderr, File is too short to be a bzImage!\n);
+   return -1;
+   }
+   header = (const struct x86_linux_header *)buf;
+   if (memcmp(header-header_magic, HdrS, 4) != 0) {
+   if (probe_debug)
+   fprintf(stderr, Not a bzImage\n);
+   return -1;
+   }
+   if (header-boot_sector_magic != 0xAA55) {
+   if (probe_debug)
+   fprintf(stderr, No x86 boot sector present\n);
+   /* No x86 boot sector present */
+   return -1;
+   }
+   if (header-protocol_version  0x020C) {
+   if (probe_debug)
+   fprintf(stderr, Must be at least protocol version 
2.12\n

[PATCH v5 4/5] kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage

2012-11-28 Thread Yinghai Lu
We could put ramdisk/cmdline above for bzImage on 64bit for protocol 2.12.

-v2: change ext_... handling to way that eric like.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/x86-linux-setup.c |   26 ++
 1 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..840bc88 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -64,7 +64,12 @@ void setup_linux_bootloader_parameters(
/* Find the maximum initial ramdisk address */
initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
if (real_mode-protocol_version = 0x0203) {
-   initrd_addr_max = real_mode-initrd_addr_max;
+   if (real_mode-protocol_version = 0x020c 
+   real_mode-xloadflags  (10)  /* CAN_BE_LOADED_ABOVE_4G 
*/
+   real_mode-xloadflags  (115)) /* USE_EXT_BOOT_PARAMS */
+   initrd_addr_max = ULONG_MAX;
+   else
+   initrd_addr_max = real_mode-initrd_addr_max;
dbgprintf(initrd_addr_max is 0x%lx\n, initrd_addr_max);
}
 
@@ -81,8 +86,16 @@ void setup_linux_bootloader_parameters(
}
 
/* Ramdisk address and size */
-   real_mode-initrd_start = initrd_base;
-   real_mode-initrd_size  = initrd_size;
+   real_mode-initrd_start = initrd_base  0xUL;
+   real_mode-initrd_size  = initrd_size  0xUL;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_base  0xUL) != initrd_base)
+   real_mode-ext_ramdisk_image = initrd_base  32;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_size  0xUL) != initrd_size)
+   real_mode-ext_ramdisk_size = initrd_size  32;
 
/* The location of the command line */
/* if (real_mode_base == 0x9) { */
@@ -91,7 +104,12 @@ void setup_linux_bootloader_parameters(
/* setup_move_size */
/* } */
if (real_mode-protocol_version = 0x0202) {
-   real_mode-cmd_line_ptr = real_mode_base + cmdline_offset;
+   unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+   real_mode-cmd_line_ptr = cmd_line_ptr  0xUL;
+   if ((real_mode-protocol_version = 0x020c) 
+   ((cmd_line_ptr  0xUL) != cmd_line_ptr))
+   real_mode-ext_cmd_line_ptr = cmd_line_ptr  32;
}
 
/* Fill in the command line */
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 0/5] kexec: put bzImage and ramdisk above 4G for x86 64bit

2012-11-28 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk above 4G
acccording to new added boot header fields.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.
-v4: remove the restriction about bzImage not crossing GB boundary.
 add real-mode fix for bzImage.
 add --entry-32bit and --real-mode for skip bzImage64.
-v5: use USE_EXT_BOOT_PARAMS bit in xloadflags.

Yinghai Lu (5):
  kexec, x86: add boot header member for version 2.12
  kexec, x86: clean boot_params area for entry-32bit path
  kexec, x86: Fix bzImage real-mode booting
  kexec, x86: put ramdisk/cmd_line above 4G for 64bit bzImage
  kexec, x86_64: Load bzImage64 above 4G

 include/x86/x86-linux.h|   32 +++-
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/i386/kexec-bzImage.c|   80 +++--
 kexec/arch/i386/x86-linux-setup.c  |   26 ++-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  335 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 8 files changed, 462 insertions(+), 22 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v5 3/5] kexec, x86: Fix bzImage real-mode booting

2012-11-28 Thread Yinghai Lu
We need to keep space for bss, heap/stack before command line.
otherwise command_line will be cleared by kernel 16bit init code.

also need to set 32bit start in real_mode header, kernel 16bit code
need to jump there.

Also don't touch regs16 if --real-mode is not specified.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   63 +++---
 1 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index fe51ab4..4d85813 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -130,6 +130,8 @@ int do_bzImage_load(struct kexec_info *info,
unsigned long kernel32_load_addr;
char *modified_cmdline;
unsigned long cmdline_end;
+   unsigned long kern16_size_needed;
+   unsigned long heap_size = 0;
 
/*
 * Find out about the file I am about to load.
@@ -221,9 +223,31 @@ int do_bzImage_load(struct kexec_info *info,
elf_rel_build_load(info, info-rhdr, purgatory, purgatory_size,
0x3000, 640*1024, -1, 0);
dbgprintf(Loaded purgatory at addr 0x%lx\n, info-rhdr.rel_addr);
+
/* The argument/parameter segment */
-   setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+   if (real_mode_entry) {
+   /* need to include size for bss and heap etc */
+   if (setup_header.protocol_version = 0x0201)
+   kern16_size_needed = setup_header.heap_end_ptr;
+   else
+   kern16_size_needed = kern16_size + 8192; /* bss */
+   if (kern16_size_needed  kern16_size)
+   kern16_size_needed = kern16_size;
+   if (kern16_size_needed  0xfffc)
+   die(kern16_size_needed is more then 64k\n);
+   heap_size = 0xfffc - kern16_size_needed; /* less 64k */
+   heap_size = ~(0x200 - 1);
+   kern16_size_needed += heap_size;
+   } else {
+   kern16_size_needed = kern16_size;
+   /* need to bigger than size of struct bootparams */
+   if (kern16_size_needed  4096)
+   kern16_size_needed = 4096;
+   }
+   setup_size = kern16_size_needed + command_line_len +
+PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
+   memset(real_mode, 0, setup_size);
memcpy(real_mode, kernel, kern16_size);
if (!real_mode_entry) {
clean_boot_params((unsigned char *)real_mode, kern16_size);
@@ -282,11 +306,20 @@ int do_bzImage_load(struct kexec_info *info,
 
/* Tell the kernel what is going on */
setup_linux_bootloader_parameters(info, real_mode, setup_base,
-   kern16_size, command_line, command_line_len,
+   kern16_size_needed, command_line, command_line_len,
initrd, initrd_len);
 
+   if (real_mode_entry) {
+   /* restore can use heap and load high */
+   real_mode-loader_flags = 0x81;
+   if (real_mode-protocol_version = 0x0201)
+   real_mode-heap_end_ptr += heap_size - 0x200; /*stack*/
+   }
+
/* Get the initial register values */
-   elf_rel_get_symbol(info-rhdr, entry16_regs, regs16, 
sizeof(regs16));
+   if (real_mode_entry)
+   elf_rel_get_symbol(info-rhdr, entry16_regs,
+regs16, sizeof(regs16));
elf_rel_get_symbol(info-rhdr, entry32_regs, regs32, 
sizeof(regs32));
/*
 
@@ -305,16 +338,18 @@ int do_bzImage_load(struct kexec_info *info,
/*
 * Initialize the 16bit start information.
 */
-   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
-   regs16.cs = regs16.ds + 0x20;
-   regs16.ip = 0;
-   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
-   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 64*1024)  4;
-   /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */
-   regs16.esp = 0xFFFC;
if (real_mode_entry) {
+   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
+   regs16.cs = regs16.ds + 0x20;
+   regs16.ip = 0;
+   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
+   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 
64*1024)  4;
+   /* XXX: Documentation/i386/boot.txt says 'sp' must equal 
heap_end */
+   regs16.esp = 0xFFFC;
+
printf(Starting the kernel in real mode\n);
regs32.eip = elf_rel_get_addr(info-rhdr, entry16);
+   real_mode-kernel_start = kernel32_load_addr;
}
if (real_mode_entry  kexec_debug) {
unsigned long entry16_debug, pre32, first32

Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-25 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 3:34 PM, H. Peter Anvin h...@zytor.com wrote:

 Also will need another two spare pages for cross 512G boundary.


 Doesn't seem like a problem.

 Let me be blunt: either we do it right or we don't do it at all.

ok, please check the version that cover gb boundary.

it is about 100 lines more than the version that does not handle crossing.

test on cross 1G, 5G, 512G, 513G.

Thanks

Yinghai


kernel_pgt_level3_spare.patch
Description: Binary data
___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v4 6/6] kexec, x86_64: Load bzImage64 above 4G

2012-11-25 Thread Yinghai Lu
On Sat, Nov 24, 2012 at 12:47 PM, Yinghai Lu ying...@kernel.org wrote:
 need to check xloadflags to see the bzImage is for 64bit relocatable.

 -v2: add kexec-bzImage64.c according to Eric.
 -v3: don't need to purgatory under 2g after Eric's change to purgatory code.
 -v4: use locate_hole find position first then add_buffer... suggested by Eric
  add buffer for kernel image at last to make kexec-load faster.
  use xloadflags in setup_header to tell if is bzImage64.
  remove not cross GB boundary searching.
  add --entry-32bit and --real-mode for skipping bzImage64.

-v5: attached. that is using USE_EXT_BOOT_PARAMS bit in xloadflags


64bit_bzImage_start_v5.patch
Description: Binary data
___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v4 0/6] kexec: put bzImage and ramdisk above 4G for x86 64bit

2012-11-24 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk high acccording
to new added boot header fields.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.
-v4: remove the restriction about bzImage not crossing GB boundary.
 add real-mode fix for bzImage.
 add --entry-32bit and --real-mode for skip bzImage64.

Yinghai Lu (6):
  kexec, x86: clean boot_params area for entry-32bit path
  kexec, x86: Fix bzImage real-mode booting
  kexec, x86: add boot header member for version 2.12
  kexec, x86: put ramdisk high for 64bit bzImage
  kexec, x86: set ext_cmd_line_ptr when boot_param is above 4g
  kexec, x86_64: Load bzImage64 above 4G

 include/x86/x86-linux.h|   32 +++-
 kexec/arch/i386/include/arch/options.h |4 +-
 kexec/arch/i386/kexec-bzImage.c|   80 +++--
 kexec/arch/i386/x86-linux-setup.c  |   25 ++-
 kexec/arch/x86_64/Makefile |1 +
 kexec/arch/x86_64/kexec-bzImage64.c|  334 
 kexec/arch/x86_64/kexec-x86_64.c   |1 +
 kexec/arch/x86_64/kexec-x86_64.h   |5 +
 8 files changed, 460 insertions(+), 22 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v4 1/6] kexec, x86: clean boot_params area for entry-32bit path

2012-11-24 Thread Yinghai Lu
kexec bzImage path setup data is shared with real-mode path, and
setup_header is copied together with setup_code.
Later 32bit just use whole area as boot_params for real_mode_data.
but those area for boot_params around setup_header is
not cleaned that will leave some field in boot_param as
non-zero value.

So clean around setup_header area for non real-mode entry path.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 6998587..80d09e7 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -98,6 +98,19 @@ void bzImage_usage(void)

 }
 
+static void clean_boot_params(unsigned char *real_mode, unsigned long size)
+{
+   unsigned long end;
+
+   /* clear value before header */
+   memset(real_mode, 0, 0x1f1);
+   /* clear value after setup_header  */
+   end = *(real_mode + 0x201);
+   end += 0x202;
+   if (end  size)
+   memset(real_mode + end, 0, size - end);
+}
+
 int do_bzImage_load(struct kexec_info *info,
const char *kernel, off_t kernel_len,
const char *command_line, off_t command_line_len,
@@ -212,6 +225,8 @@ int do_bzImage_load(struct kexec_info *info,
setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
memcpy(real_mode, kernel, kern16_size);
+   if (!real_mode_entry)
+   clean_boot_params((unsigned char *)real_mode, kern16_size);
 
if (info-kexec_flags  (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
/* If using bzImage for capture kernel, then we will not be
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v4 4/6] kexec, x86: put ramdisk high for 64bit bzImage

2012-11-24 Thread Yinghai Lu
We could put ramdisk high for bzImage on 64bit for protocol 2.12.

-v2: change ext_... handling to way that eric like.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c   |2 ++
 kexec/arch/i386/x86-linux-setup.c |   18 +++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index d83f0a9..30c38e2 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -251,6 +251,8 @@ int do_bzImage_load(struct kexec_info *info,
memcpy(real_mode, kernel, kern16_size);
if (!real_mode_entry)
clean_boot_params((unsigned char *)real_mode, kern16_size);
+   /* disable loading above 4g */
+   real_mode-xloadflags = ~1;
 
if (info-kexec_flags  (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)) {
/* If using bzImage for capture kernel, then we will not be
diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..3c31f64 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -64,7 +64,11 @@ void setup_linux_bootloader_parameters(
/* Find the maximum initial ramdisk address */
initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
if (real_mode-protocol_version = 0x0203) {
-   initrd_addr_max = real_mode-initrd_addr_max;
+   if (real_mode-protocol_version = 0x020c 
+   real_mode-xloadflags  1)
+   initrd_addr_max = ULONG_MAX;
+   else
+   initrd_addr_max = real_mode-initrd_addr_max;
dbgprintf(initrd_addr_max is 0x%lx\n, initrd_addr_max);
}
 
@@ -81,8 +85,16 @@ void setup_linux_bootloader_parameters(
}
 
/* Ramdisk address and size */
-   real_mode-initrd_start = initrd_base;
-   real_mode-initrd_size  = initrd_size;
+   real_mode-initrd_start = initrd_base  0xUL;
+   real_mode-initrd_size  = initrd_size  0xUL;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_base  0xUL) != initrd_base)
+   real_mode-ext_ramdisk_image = initrd_base  32;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_size  0xUL) != initrd_size)
+   real_mode-ext_ramdisk_size = initrd_size  32;
 
/* The location of the command line */
/* if (real_mode_base == 0x9) { */
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v4 5/6] kexec, x86: set ext_cmd_line_ptr when boot_param is above 4g

2012-11-24 Thread Yinghai Lu
update ext_cmd_line_ptr for bzImage from protocal 2.12
that could have command line above 4g.

-v2: update ext_... handling to the way that Eric likes.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/x86-linux-setup.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index 3c31f64..d12dab1 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -103,7 +103,12 @@ void setup_linux_bootloader_parameters(
/* setup_move_size */
/* } */
if (real_mode-protocol_version = 0x0202) {
-   real_mode-cmd_line_ptr = real_mode_base + cmdline_offset;
+   unsigned long cmd_line_ptr = real_mode_base + cmdline_offset;
+
+   real_mode-cmd_line_ptr = cmd_line_ptr  0xUL;
+   if ((real_mode-protocol_version = 0x020c) 
+   ((cmd_line_ptr  0xUL) != cmd_line_ptr))
+   real_mode-ext_cmd_line_ptr = cmd_line_ptr  32;
}
 
/* Fill in the command line */
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v4 3/6] kexec, x86: add boot header member for version 2.12

2012-11-24 Thread Yinghai Lu
will use ext_ramdisk_image/size, and xloadflags to put
ramdisk and bzImage high for 64bit.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 include/x86/x86-linux.h |   32 +++-
 1 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/include/x86/x86-linux.h b/include/x86/x86-linux.h
index 27af02b..a2452f2 100644
--- a/include/x86/x86-linux.h
+++ b/include/x86/x86-linux.h
@@ -32,7 +32,7 @@ struct drive_info_struct {
 };
 struct sys_desc_table {
uint16_t length;
-   uint8_t  table[318];
+   uint8_t  table[30];
 };
 
 struct apm_bios_info {
@@ -112,6 +112,10 @@ struct x86_linux_param_header {
struct apm_bios_info apm_bios_info; /* 0x40 */
struct drive_info_struct drive_info;/* 0x80 */
struct sys_desc_table sys_desc_table;   /* 0xa0 */
+   uint32_t ext_ramdisk_image; /* 0xc0 */
+   uint32_t ext_ramdisk_size;  /* 0xc4 */
+   uint32_t ext_cmd_line_ptr;  /* 0xc8 */
+   uint8_t reserved4_1[0x1e0 - 0xcc];  /* 0xcc */
uint32_t alt_mem_k; /* 0x1e0 */
uint8_t  reserved5[4];  /* 0x1e4 */
uint8_t  e820_map_nr;   /* 0x1e8 */
@@ -174,11 +178,18 @@ struct x86_linux_param_header {
/* 2.04+ */
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved15[3]; /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
-   uint8_t  reserved16[0x290 - 0x248]; /* 0x248 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
+   uint8_t  reserved16[0x290 - 0x268]; /* 0x268 */
uint32_t edd_mbr_sig_buffer[EDD_MBR_SIG_MAX];   /* 0x290 */
 #endif
struct  e820entry e820_map[E820MAX];/* 0x2d0 */
@@ -195,7 +206,11 @@ struct x86_linux_faked_param_header {
 };
 
 struct x86_linux_header {
-   uint8_t  reserved1[0x1f1];  /* 0x000 */
+   uint8_t  reserved1[0xc0];   /* 0x000 */
+   uint32_t ext_ramdisk_image; /* 0x0c0 */
+   uint32_t ext_ramdisk_size;  /* 0x0c4 */
+   uint32_t ext_cmd_line_ptr;  /* 0x0c8 */
+   uint8_t  reserved1_1[0x1f1-0xcc];   /* 0x0cc */
uint8_t  setup_sects;   /* 0x1f1 */
uint16_t root_flags;/* 0x1f2 */
uint16_t syssize;   /* 0x1f4 */
@@ -241,10 +256,17 @@ struct x86_linux_header {
 #else
uint32_t kernel_alignment;  /* 0x230 */
uint8_t  relocatable_kernel;/* 0x234 */
-   uint8_t  reserved6[3];  /* 0x235 */
+   uint8_t  min_alignment; /* 0x235 */
+   uint16_t xloadflags;/* 0x236 */
uint32_t cmdline_size;  /* 0x238 */
uint32_t hardware_subarch;  /* 0x23C */
uint64_t hardware_subarch_data; /* 0x240 */
+   uint32_t payload_offset;/* 0x248 */
+   uint32_t payload_length;/* 0x24C */
+   uint64_t setup_data;/* 0x250 */
+   uint64_t pref_address;  /* 0x258 */
+   uint32_t init_size; /* 0x260 */
+   uint32_t handover_offset;   /* 0x264 */
 #endif
 } PACKED;
 
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v4 2/6] kexec, x86: Fix bzImage real-mode booting

2012-11-24 Thread Yinghai Lu
We need to keep space for bss, heap/stack before command line.
otherwise command_line will be cleared by kernel 16bit init code.

also need to set 32bit start in real_mode header, kernel 16bit code
need to jump there.

Also don't touch regs16 if --real-mode is not specified.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/kexec-bzImage.c |   63 +++---
 1 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/kexec/arch/i386/kexec-bzImage.c b/kexec/arch/i386/kexec-bzImage.c
index 80d09e7..d83f0a9 100644
--- a/kexec/arch/i386/kexec-bzImage.c
+++ b/kexec/arch/i386/kexec-bzImage.c
@@ -130,6 +130,8 @@ int do_bzImage_load(struct kexec_info *info,
unsigned long kernel32_load_addr;
char *modified_cmdline;
unsigned long cmdline_end;
+   unsigned long kern16_size_needed;
+   unsigned long heap_size = 0;
 
/*
 * Find out about the file I am about to load.
@@ -221,9 +223,31 @@ int do_bzImage_load(struct kexec_info *info,
elf_rel_build_load(info, info-rhdr, purgatory, purgatory_size,
0x3000, 640*1024, -1, 0);
dbgprintf(Loaded purgatory at addr 0x%lx\n, info-rhdr.rel_addr);
+
/* The argument/parameter segment */
-   setup_size = kern16_size + command_line_len + PURGATORY_CMDLINE_SIZE;
+   if (real_mode_entry) {
+   /* need to include size for bss and heap etc */
+   if (setup_header.protocol_version = 0x0201)
+   kern16_size_needed = setup_header.heap_end_ptr;
+   else
+   kern16_size_needed = kern16_size + 8192; /* bss */
+   if (kern16_size_needed  kern16_size)
+   kern16_size_needed = kern16_size;
+   if (kern16_size_needed  0xfffc)
+   die(kern16_size_needed is more then 64k\n);
+   heap_size = 0xfffc - kern16_size_needed; /* less 64k */
+   heap_size = ~(0x200 - 1);
+   kern16_size_needed += heap_size;
+   } else {
+   kern16_size_needed = kern16_size;
+   /* need to bigger than size of struct bootparams */
+   if (kern16_size_needed  4096)
+   kern16_size_needed = 4096;
+   }
+   setup_size = kern16_size_needed + command_line_len +
+PURGATORY_CMDLINE_SIZE;
real_mode = xmalloc(setup_size);
+   memset(real_mode, 0, setup_size);
memcpy(real_mode, kernel, kern16_size);
if (!real_mode_entry)
clean_boot_params((unsigned char *)real_mode, kern16_size);
@@ -280,11 +304,20 @@ int do_bzImage_load(struct kexec_info *info,
 
/* Tell the kernel what is going on */
setup_linux_bootloader_parameters(info, real_mode, setup_base,
-   kern16_size, command_line, command_line_len,
+   kern16_size_needed, command_line, command_line_len,
initrd, initrd_len);
 
+   if (real_mode_entry) {
+   /* restore can use heap and load high */
+   real_mode-loader_flags = 0x81;
+   if (real_mode-protocol_version = 0x0201)
+   real_mode-heap_end_ptr += heap_size - 0x200; /*stack*/
+   }
+
/* Get the initial register values */
-   elf_rel_get_symbol(info-rhdr, entry16_regs, regs16, 
sizeof(regs16));
+   if (real_mode_entry)
+   elf_rel_get_symbol(info-rhdr, entry16_regs,
+regs16, sizeof(regs16));
elf_rel_get_symbol(info-rhdr, entry32_regs, regs32, 
sizeof(regs32));
/*
 
@@ -303,16 +336,18 @@ int do_bzImage_load(struct kexec_info *info,
/*
 * Initialize the 16bit start information.
 */
-   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
-   regs16.cs = regs16.ds + 0x20;
-   regs16.ip = 0;
-   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
-   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 64*1024)  4;
-   /* XXX: Documentation/i386/boot.txt says 'sp' must equal heap_end */
-   regs16.esp = 0xFFFC;
if (real_mode_entry) {
+   regs16.ds = regs16.es = regs16.fs = regs16.gs = setup_base  4;
+   regs16.cs = regs16.ds + 0x20;
+   regs16.ip = 0;
+   /* XXX: Documentation/i386/boot.txt says 'ss' must equal 'ds' */
+   regs16.ss = (elf_rel_get_addr(info-rhdr, stack_end) - 
64*1024)  4;
+   /* XXX: Documentation/i386/boot.txt says 'sp' must equal 
heap_end */
+   regs16.esp = 0xFFFC;
+
printf(Starting the kernel in real mode\n);
regs32.eip = elf_rel_get_addr(info-rhdr, entry16);
+   real_mode-kernel_start = kernel32_load_addr;
}
if (real_mode_entry  kexec_debug) {
unsigned long entry16_debug, pre32, first32

Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-21 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 6:50 AM, Vivek Goyal vgo...@redhat.com wrote:
 On Tue, Nov 20, 2012 at 11:31:38PM -0800, Yinghai Lu wrote:

 [..]
 +int bzImage64_probe(const char *buf, off_t len)
 +{
 + const struct x86_linux_header *header;
 + if ((uintmax_t)len  (uintmax_t)(2 * 512)) {
 + if (probe_debug) {
 + fprintf(stderr, File is too short to be a 
 bzImage!\n);
 + }
 + return -1;
 + }
 + header = (const struct x86_linux_header *)buf;
 + if (memcmp(header-header_magic, HdrS, 4) != 0) {
 + if (probe_debug) {
 + fprintf(stderr, Not a bzImage\n);
 + }
 + return -1;
 + }
 + if (header-boot_sector_magic != 0xAA55) {
 + if (probe_debug) {
 + fprintf(stderr, No x86 boot sector present\n);
 + }
 + /* No x86 boot sector present */
 + return -1;
 + }
 + if (header-protocol_version  0x020C) {
 + if (probe_debug) {
 + fprintf(stderr, Must be at least protocol version 
 2.12\n);
 + }
 + /* Must be at least protocol version 2.12 */
 + return -1;
 + }
 + if ((header-loadflags  1) == 0) {
 + if (probe_debug) {
 + fprintf(stderr, zImage not a bzImage\n);
 + }
 + /* Not a bzImage */
 + return -1;
 + }
 + if (!(header-xloadflags  1)) {
 + if (probe_debug) {
 + fprintf(stderr, Not a bzImage64\n);
 + }
 + /* Must be LOADED_ABOVE_4G */
 + return -1;
 + }

 So how do I force a 16bit or 32bit entry using a bzImage64?

kexec -t bzImage -l 
will load low and use 32bit entry.

kexec -t bzImage64 -l ...
kexec -l ...
will try to load high and use 64bit entry.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-21 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 11:52 AM, H. Peter Anvin h...@zytor.com wrote:
 On 11/21/2012 11:50 AM, Yinghai Lu wrote:

 So how do I force a 16bit or 32bit entry using a bzImage64?

 kexec -t bzImage -l 
 will load low and use 32bit entry.

 kexec -t bzImage64 -l ...
 kexec -l ...
 will try to load high and use 64bit entry.



 I don't see any difference...



--- a/kexec/arch/x86_64/kexec-x86_64.c
+++ b/kexec/arch/x86_64/kexec-x86_64.c
@@ -37,6 +37,7 @@ struct file_type file_type[] = {
{ multiboot-x86, multiboot_x86_probe, multiboot_x86_load,
  multiboot_x86_usage },
{ elf-x86, elf_x86_probe, elf_x86_load, elf_x86_usage },
+   { bzImage64, bzImage64_probe, bzImage64_load, bzImage64_usage },
{ bzImage, bzImage_probe, bzImage_load, bzImage_usage },
{ beoboot-x86, beoboot_probe, beoboot_load, beoboot_usage },
{ nbi-x86, nbi_probe, nbi_load, nbi_usage },

bzImage64_probe will be run before bzImage_probe.

and if it find that is 64bit, bzImage_probe will not be executed.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-21 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 11:56 AM, H. Peter Anvin h...@zytor.com wrote:
 On 11/21/2012 11:54 AM, Yinghai Lu wrote:

 in kernel arch/x86/kernel/head_64.S

 it only set first 1G ident mapping. and if it find that code is above
 1G, it will set extra ident mapping
 for new _text.._end.
 To make checking and add extra mapping simple and also save two extra
 pages for mapping.
 Limit that _text.._end in them same GB range.


 No, this is backwards.

old one: it limited bzImage in [0,1G) aka the first 1G.

Now we can put it in any aligned 1G range.

So how could it be called backwards?


 We should fix that limitation instead.

sure, but that will make arch/x86/boot/compressed/head_64.S not need
complicated.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-21 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 12:00 PM, Vivek Goyal vgo...@redhat.com wrote:
 On Wed, Nov 21, 2012 at 11:50:56AM -0800, Yinghai Lu wrote:

 [..]
  So how do I force a 16bit or 32bit entry using a bzImage64?

 kexec -t bzImage -l 
 will load low and use 32bit entry.


 Ok, so user needs to enforce image type (using -t option) to bzImage
 (for a bzImage which supports 64bit entry point) to be able to use
 32bit entry.

 I think a better option is that bzImage64 loader parses the user specified
 entry options and call into 32bit bzImage loader if user is asking for 32bit
 or 16bit entry. For 16bit, we already have option --real-mode option. May
 be we need to introduce another one for forcing 32bit entry, say --entry-32bit
 or --protected-mode (whatever makes sense).

that is doable.

add checking entry-32bit and real-mode checking in kexec-bzImage64.c
and bail early to leave kexec-bzImage.c to take over.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-21 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 12:12 PM, Vivek Goyal vgo...@redhat.com wrote:

 Or actually can we do reverse. Do not introduce new image format
 bzImage64. In existing bzImage loader if bzImage version is greater
 than 0x020c (or whatever version has 64bit entry extension), just
 call into load_bzImage64().

Eric wanted me to separate that out.
and you want me to put them back?

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-21 Thread Yinghai Lu
On Wed, Nov 21, 2012 at 12:16 PM, H. Peter Anvin h...@zytor.com wrote:
 On 11/21/2012 12:01 PM, Yinghai Lu wrote:
 On Wed, Nov 21, 2012 at 11:56 AM, H. Peter Anvin h...@zytor.com wrote:
 On 11/21/2012 11:54 AM, Yinghai Lu wrote:

 in kernel arch/x86/kernel/head_64.S

 it only set first 1G ident mapping. and if it find that code is above
 1G, it will set extra ident mapping
 for new _text.._end.
 To make checking and add extra mapping simple and also save two extra
 pages for mapping.
 Limit that _text.._end in them same GB range.


 No, this is backwards.

 old one: it limited bzImage in [0,1G) aka the first 1G.

 Now we can put it in any aligned 1G range.

 So how could it be called backwards?


 Because you're adding a more complicated hack.

not that complicated, and it only add 7 lines

/* same GB ? */
while ((addr  30) != ((addr + size - 1)  30)) {
addr = locate_hole(info, size, align, 0x10,
 round_down(addr + size - 1, (1UL30)), -1);
if (addr == ULONG_MAX)
die(can not load bzImage64);
}



 We should fix that limitation instead.

 sure, but that will make arch/x86/boot/compressed/head_64.S not need
 complicated.

If that add cover cross GB boundary handling in head_64.S, may need
another 100 line code
for checking and etc.

Also will need another two spare pages for cross 512G boundary.


 But it makes the bootloaders more complicated, and the bootloaders are
 harder to fix.

current bootloader does not have this feature and will
add the feature. and looks like kexec is only one at this point.

BTW,  is there any 64bit boot loader?

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 2/4] kexec, x86: put ramdisk high for 64bit bzImage

2012-11-20 Thread Yinghai Lu
We could put ramdisk high for bzImage on 64bit for protocol 2.12.

-v2: change ext_... handling to way that eric like.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/i386/x86-linux-setup.c |   18 +++---
 1 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/kexec/arch/i386/x86-linux-setup.c 
b/kexec/arch/i386/x86-linux-setup.c
index b7ab8ea..3c31f64 100644
--- a/kexec/arch/i386/x86-linux-setup.c
+++ b/kexec/arch/i386/x86-linux-setup.c
@@ -64,7 +64,11 @@ void setup_linux_bootloader_parameters(
/* Find the maximum initial ramdisk address */
initrd_addr_max = DEFAULT_INITRD_ADDR_MAX;
if (real_mode-protocol_version = 0x0203) {
-   initrd_addr_max = real_mode-initrd_addr_max;
+   if (real_mode-protocol_version = 0x020c 
+   real_mode-xloadflags  1)
+   initrd_addr_max = ULONG_MAX;
+   else
+   initrd_addr_max = real_mode-initrd_addr_max;
dbgprintf(initrd_addr_max is 0x%lx\n, initrd_addr_max);
}
 
@@ -81,8 +85,16 @@ void setup_linux_bootloader_parameters(
}
 
/* Ramdisk address and size */
-   real_mode-initrd_start = initrd_base;
-   real_mode-initrd_size  = initrd_size;
+   real_mode-initrd_start = initrd_base  0xUL;
+   real_mode-initrd_size  = initrd_size  0xUL;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_base  0xUL) != initrd_base)
+   real_mode-ext_ramdisk_image = initrd_base  32;
+
+   if (real_mode-protocol_version = 0x020c 
+   (initrd_size  0xUL) != initrd_size)
+   real_mode-ext_ramdisk_size = initrd_size  32;
 
/* The location of the command line */
/* if (real_mode_base == 0x9) { */
-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 0/4] kexec: put bzImage and ramdisk above 4G for x86 64bit

2012-11-20 Thread Yinghai Lu
Now we have limit kdump reserved under 896M, because kexec has the limitation.
and also bzImage need to stay under 4g.

kernel parts changes could be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git 
for-x86-boot

here patches are for kexec tools to load bzImage and ramdisk high acccording
to new added boot header fields.

-v3: address review from Eric to use locate_hole at first.
 use xloadflags instead.

Yinghai Lu (4):
  kexec, x86: add boot header member for version 2.12
  kexec, x86: put ramdisk high for 64bit bzImage
  kexec, x86: set ext_cmd_line_ptr when boot_param is above 4g
  kexec, x86_64: Load bzImage64 above 4G

 include/x86/x86-linux.h |   26 +++-
 kexec/arch/i386/x86-linux-setup.c   |   25 +++-
 kexec/arch/x86_64/Makefile  |1 +
 kexec/arch/x86_64/kexec-bzImage64.c |  327 +++
 kexec/arch/x86_64/kexec-x86_64.c|1 +
 kexec/arch/x86_64/kexec-x86_64.h|5 +
 6 files changed, 378 insertions(+), 7 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

-- 
1.7.7


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v3 4/4] kexec, x86_64: Load bzImage64 above 4G

2012-11-20 Thread Yinghai Lu
need to check xloadflags to see the bzImage is for 64bit relocatable.

-v2: add kexec-bzImage64.c according to Eric.
-v3: don't need to purgatory under 2g after Eric's change to purgatory code.
-v4: use locate_hole find position first then add_buffer... suggested by Eric
 add buffer for kernel image at last to make kexec-load faster.
 use xloadflags in setup_header to tell if is bzImage64.

Signed-off-by: Yinghai Lu ying...@kernel.org
---
 kexec/arch/x86_64/Makefile  |1 +
 kexec/arch/x86_64/kexec-bzImage64.c |  327 +++
 kexec/arch/x86_64/kexec-x86_64.c|1 +
 kexec/arch/x86_64/kexec-x86_64.h|5 +
 4 files changed, 334 insertions(+), 0 deletions(-)
 create mode 100644 kexec/arch/x86_64/kexec-bzImage64.c

diff --git a/kexec/arch/x86_64/Makefile b/kexec/arch/x86_64/Makefile
index 405bdf5..1cf10f9 100644
--- a/kexec/arch/x86_64/Makefile
+++ b/kexec/arch/x86_64/Makefile
@@ -13,6 +13,7 @@ x86_64_KEXEC_SRCS += kexec/arch/i386/crashdump-x86.c
 x86_64_KEXEC_SRCS_native =  kexec/arch/x86_64/kexec-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-x86_64.c
 x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-elf-rel-x86_64.c
+x86_64_KEXEC_SRCS_native += kexec/arch/x86_64/kexec-bzImage64.c
 
 x86_64_KEXEC_SRCS += $(x86_64_KEXEC_SRCS_native)
 
diff --git a/kexec/arch/x86_64/kexec-bzImage64.c 
b/kexec/arch/x86_64/kexec-bzImage64.c
new file mode 100644
index 000..28f1ace
--- /dev/null
+++ b/kexec/arch/x86_64/kexec-bzImage64.c
@@ -0,0 +1,327 @@
+/*
+ * kexec: Linux boots Linux
+ *
+ * Copyright (C) 2003-2010  Eric Biederman (ebied...@xmission.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+#include stddef.h
+#include stdio.h
+#include string.h
+#include limits.h
+#include stdlib.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#include fcntl.h
+#include unistd.h
+#include getopt.h
+#include elf.h
+#include boot/elf_boot.h
+#include ip_checksum.h
+#include x86/x86-linux.h
+#include ../../kexec.h
+#include ../../kexec-elf.h
+#include ../../kexec-syscall.h
+#include kexec-x86_64.h
+#include ../i386/x86-linux-setup.h
+#include ../i386/crashdump-x86.h
+#include arch/options.h
+
+static const int probe_debug = 0;
+
+int bzImage64_probe(const char *buf, off_t len)
+{
+   const struct x86_linux_header *header;
+   if ((uintmax_t)len  (uintmax_t)(2 * 512)) {
+   if (probe_debug) {
+   fprintf(stderr, File is too short to be a bzImage!\n);
+   }
+   return -1;
+   }
+   header = (const struct x86_linux_header *)buf;
+   if (memcmp(header-header_magic, HdrS, 4) != 0) {
+   if (probe_debug) {
+   fprintf(stderr, Not a bzImage\n);
+   }
+   return -1;
+   }
+   if (header-boot_sector_magic != 0xAA55) {
+   if (probe_debug) {
+   fprintf(stderr, No x86 boot sector present\n);
+   }
+   /* No x86 boot sector present */
+   return -1;
+   }
+   if (header-protocol_version  0x020C) {
+   if (probe_debug) {
+   fprintf(stderr, Must be at least protocol version 
2.12\n);
+   }
+   /* Must be at least protocol version 2.12 */
+   return -1;
+   }
+   if ((header-loadflags  1) == 0) {
+   if (probe_debug) {
+   fprintf(stderr, zImage not a bzImage\n);
+   }
+   /* Not a bzImage */
+   return -1;
+   }
+   if (!(header-xloadflags  1)) {
+   if (probe_debug) {
+   fprintf(stderr, Not a bzImage64\n);
+   }
+   /* Must be LOADED_ABOVE_4G */
+   return -1;
+   }
+   /* I've got a bzImage64 */
+   if (probe_debug) {
+   fprintf(stderr, It's a bzImage64\n);
+   }
+   return 0;
+}
+
+void bzImage64_usage(void)
+{
+   printf( --command-line=STRING Set the kernel command line to 
STRING.\n
+   --append=STRING   Set the kernel command line to 
STRING.\n
+   --reuse-cmdline   Use kernel command line from running 
system.\n
+   --initrd=FILE Use FILE as the kernel's initial 
ramdisk.\n

  1   2   >