On Tue, Apr 21, 2009 at 11:49:55AM +0900, Isaku Yamahata wrote:
> On Tue, Apr 21, 2009 at 11:19:06AM +0900, Isaku Yamahata wrote:
> > On Tue, Apr 21, 2009 at 10:27:02AM +0900, Akio Takebe wrote:
> > > Hi,
> > > 
> > > The following changeset broke booting xen-ia64 on some kinds of ia64 
> > > boxes.
> > > http://xenbits.xensource.com/ext/ia64/xen-unstable.hg/rev/3fd8f9b34941
> > > 
> > > The tasklet_schedule call raise_softirq().
> > > Because raise_softirq() use per_cpu, if we access per_cpu before 
> > > cpu_init()
> > > the behavior would be unexpected.
> > > 
> > > I make the following patch for this investigation.
> > > It can boot xen-ia64 on the ia64 boxes.
> > > I'm not sure why Tiger4 can boot the latest xen-ia64.
> > > I didn't find a good-looking solution, what do you think about it?
> > 
> > Unfortunately, it happened to boot on my tiger4 so that I pushed out
> > the change set.
> > I Understood the issue. Looking into the boot sequence, it seems
> > to somewhat difficult to move down init_console() after cpu_init()
> > and remove all the printk() before cpu_init().
> > Hmm, it needs some consideration.
> > 
> > BTW, is there similar issue on ia64 linux case before?
> 
> Yes, there was.
> commit 10617bbe84628eb18ab5f723d3ba35005adde143
> and
> commit c459ce8b5a7d933a3bcf6915ab17ac1e036e2ac4
> 
> The mails were
> Jul 14 Christian Kande Initialization order problem
> and
> Aug 07 Luck, Tony      [RFC] Fix early access to per-cpu variables
> thread.

I created the patch following ia64 linux way.
Could you try it?
This is not intrusive to the common code, and prevent
future similar breakage.

[IA64] fix early access to per cpu area.

The following changeset broke booting xen-ia64 on some kinds of ia64 boxes.
http://xenbits.xensource.com/ext/ia64/xen-unstable.hg/rev/3fd8f9b34941

The tasklet_schedule call raise_softirq().
Because raise_softirq() use per_cpu, if we access per_cpu before cpu_init()
the behavior would be unexpected.

There was a similar issueson Linux/ia64.
The following change sets resolved it.
        10617bbe84628eb18ab5f723d3ba35005adde143
        c459ce8b5a7d933a3bcf6915ab17ac1e036e2ac4

This patch fixes the issue following the linux/ia64 solution.
Allocate per cpu area for cpu0 in .data section and initialize
it early.

reported-by: Akio Takebe <takebe_a...@jp.fujitsu.com>
Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp>

diff --git a/xen/arch/ia64/linux-xen/head.S b/xen/arch/ia64/linux-xen/head.S
--- a/xen/arch/ia64/linux-xen/head.S
+++ b/xen/arch/ia64/linux-xen/head.S
@@ -382,6 +382,35 @@ 1: // now we are in virtual mode
        mov ar.rsc=0            // place RSE in enforced lazy mode
        ;;
        loadrs                  // clear the dirty partition
+#ifdef XEN
+(isAP) br.few 2f
+       movl r19=__phys_per_cpu_start
+       mov r18=PERCPU_PAGE_SIZE
+#ifndef CONFIG_SMP
+       add r19=r19,r18
+       ;;
+#else
+       movl r20=__cpu0_per_cpu
+       ;;
+       shr.u r18=r18,3
+1:
+       ld8 r21=[r19],8 ;;
+       st8[r20]=r21,8
+       adds r18=-1,r18
+       ;;
+       cmp4.lt p7,p6=0,r18
+(p7)   br.cond.dptk.few 1b
+       ;;
+#endif
+       movl r18=__per_cpu_offset
+       movl r19=__cpu0_per_cpu
+       movl r20=__per_cpu_start
+       ;;
+       sub r20=r19,r20
+       ;;
+       st8 [r18]=r20
+2:
+#endif
        ;;
        mov ar.bspstore=r2      // establish the new RSE stack
        ;;
diff --git a/xen/arch/ia64/linux-xen/mm_contig.c 
b/xen/arch/ia64/linux-xen/mm_contig.c
--- a/xen/arch/ia64/linux-xen/mm_contig.c
+++ b/xen/arch/ia64/linux-xen/mm_contig.c
@@ -183,7 +183,7 @@ void *percpu_area __initdata = NULL;
 void* __init
 per_cpu_allocate(void *xen_heap_start, unsigned long end_in_pa)
 {
-       int order = get_order(NR_CPUS * PERCPU_PAGE_SIZE);
+       int order = get_order((NR_CPUS - 1) * PERCPU_PAGE_SIZE);
        unsigned long size = 1UL << (order + PAGE_SHIFT);
        unsigned long start = ALIGN_UP((unsigned long)xen_heap_start,
                                       PERCPU_PAGE_SIZE);
@@ -226,19 +226,31 @@ per_cpu_init (void)
         */
        if (smp_processor_id() == 0) {
 #ifdef XEN
+               void *cpu0_data = __cpu0_per_cpu;
+
+               __per_cpu_offset[0] = (char *)cpu0_data - __per_cpu_start;
+               per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];
+
                cpu_data = get_per_cpu_area();
                if (cpu_data == NULL) 
                        panic("can't allocate per cpu area.\n");
+
+               for (cpu = 1; cpu < NR_CPUS; cpu++) {
+                       memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - 
__per_cpu_start);
+                       __per_cpu_offset[cpu] = (char *) cpu_data - 
__per_cpu_start;
+                       cpu_data += PERCPU_PAGE_SIZE;
+                       per_cpu(local_per_cpu_offset, cpu) = 
__per_cpu_offset[cpu];
+               }
 #else
                cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
                                           PERCPU_PAGE_SIZE, 
__pa(MAX_DMA_ADDRESS));
-#endif
                for (cpu = 0; cpu < NR_CPUS; cpu++) {
                        memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - 
__per_cpu_start);
                        __per_cpu_offset[cpu] = (char *) cpu_data - 
__per_cpu_start;
                        cpu_data += PERCPU_PAGE_SIZE;
                        per_cpu(local_per_cpu_offset, cpu) = 
__per_cpu_offset[cpu];
                }
+#endif
        }
        return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
diff --git a/xen/arch/ia64/linux-xen/smpboot.c 
b/xen/arch/ia64/linux-xen/smpboot.c
--- a/xen/arch/ia64/linux-xen/smpboot.c
+++ b/xen/arch/ia64/linux-xen/smpboot.c
@@ -449,8 +449,8 @@ start_secondary (void *unused)
 {
        /* Early console may use I/O ports */
        ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef XEN
        Dprintk("start_secondary: starting CPU 0x%x\n", 
hard_smp_processor_id());
-#ifndef XEN
        efi_map_pal_code();
 #endif
        cpu_init();
diff --git a/xen/arch/ia64/xen/xen.lds.S b/xen/arch/ia64/xen/xen.lds.S
--- a/xen/arch/ia64/xen/xen.lds.S
+++ b/xen/arch/ia64/xen/xen.lds.S
@@ -195,7 +195,17 @@ SECTIONS
 
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
-       { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
+       {
+#ifdef CONFIG_SMP
+  . = ALIGN(PERCPU_PAGE_SIZE);
+               __cpu0_per_cpu = .;
+ . = . + PERCPU_PAGE_SIZE;    /* cpu0 per-cpu space */
+#endif
+               *(.data)
+               *(.data1)
+               *(.gnu.linkonce.d*)
+               CONSTRUCTORS
+       }
 
   . = ALIGN(16);       /* gp must be 16-byte aligned for exc. table */
   .got : AT(ADDR(.got) - LOAD_OFFSET)
diff --git a/xen/include/asm-ia64/linux-xen/asm/README.origin 
b/xen/include/asm-ia64/linux-xen/asm/README.origin
--- a/xen/include/asm-ia64/linux-xen/asm/README.origin
+++ b/xen/include/asm-ia64/linux-xen/asm/README.origin
@@ -22,6 +22,7 @@ pgtable.h             -> linux/include/asm-ia64/pgt
 processor.h            -> linux/include/asm-ia64/processor.h
 ptrace.h               -> linux/include/asm-ia64/ptrace.h
 sal.h                  -> linux/include/asm-ia64/sal.h
+sections.h             -> linux/include/asm-ia64/sections.h
 smp.h                  -> linux/include/asm-ia64/smp.h
 spinlock.h             -> linux/include/asm-ia64/spinlock.h
 system.h               -> linux/include/asm-ia64/system.h
diff --git a/xen/include/asm-ia64/linux/asm/sections.h 
b/xen/include/asm-ia64/linux-xen/asm/sections.h
rename from xen/include/asm-ia64/linux/asm/sections.h
rename to xen/include/asm-ia64/linux-xen/asm/sections.h
--- a/xen/include/asm-ia64/linux/asm/sections.h
+++ b/xen/include/asm-ia64/linux-xen/asm/sections.h
@@ -9,6 +9,9 @@
 #include <asm-generic/sections.h>
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
+#ifdef CONFIG_SMP
+extern char __cpu0_per_cpu[];
+#endif
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
 extern char __start_gate_section[];
diff --git a/xen/include/asm-ia64/linux/asm/README.origin 
b/xen/include/asm-ia64/linux/asm/README.origin
--- a/xen/include/asm-ia64/linux/asm/README.origin
+++ b/xen/include/asm-ia64/linux/asm/README.origin
@@ -29,7 +29,6 @@ param.h                       -> linux/include/asm-ia64/para
 patch.h                        -> linux/include/asm-ia64/patch.h
 pci.h                  -> linux/include/asm-ia64/pci.h
 rse.h                  -> linux/include/asm-ia64/rse.h
-sections.h             -> linux/include/asm-ia64/sections.h
 setup.h                        -> linux/include/asm-ia64/setup.h
 string.h               -> linux/include/asm-ia64/string.h
 thread_info.h          -> linux/include/asm-ia64/thread_info.h

-- 
yamahata
[IA64] fix early access to per cpu area.

The following changeset broke booting xen-ia64 on some kinds of ia64 boxes.
http://xenbits.xensource.com/ext/ia64/xen-unstable.hg/rev/3fd8f9b34941

The tasklet_schedule call raise_softirq().
Because raise_softirq() use per_cpu, if we access per_cpu before cpu_init()
the behavior would be unexpected.

There was a similar issueson Linux/ia64.
The following change sets resolved it.
	10617bbe84628eb18ab5f723d3ba35005adde143
	c459ce8b5a7d933a3bcf6915ab17ac1e036e2ac4

This patch fixes the issue following the linux/ia64 solution.
Allocate per cpu area for cpu0 in .data section and initialize
it early.

reported-by: Akio Takebe <takebe_a...@jp.fujitsu.com>
Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp>

diff --git a/xen/arch/ia64/linux-xen/head.S b/xen/arch/ia64/linux-xen/head.S
--- a/xen/arch/ia64/linux-xen/head.S
+++ b/xen/arch/ia64/linux-xen/head.S
@@ -382,6 +382,35 @@ 1:	// now we are in virtual mode
 	mov ar.rsc=0		// place RSE in enforced lazy mode
 	;;
 	loadrs			// clear the dirty partition
+#ifdef XEN
+(isAP)	br.few 2f
+	movl r19=__phys_per_cpu_start
+	mov r18=PERCPU_PAGE_SIZE
+#ifndef CONFIG_SMP
+	add r19=r19,r18
+	;;
+#else
+	movl r20=__cpu0_per_cpu
+	;;
+	shr.u r18=r18,3
+1:
+	ld8 r21=[r19],8 ;;
+	st8[r20]=r21,8
+	adds r18=-1,r18
+	;;
+	cmp4.lt p7,p6=0,r18
+(p7)	br.cond.dptk.few 1b
+	;;
+#endif
+	movl r18=__per_cpu_offset
+	movl r19=__cpu0_per_cpu
+	movl r20=__per_cpu_start
+	;;
+	sub r20=r19,r20
+	;;
+	st8 [r18]=r20
+2:
+#endif
 	;;
 	mov ar.bspstore=r2	// establish the new RSE stack
 	;;
diff --git a/xen/arch/ia64/linux-xen/mm_contig.c b/xen/arch/ia64/linux-xen/mm_contig.c
--- a/xen/arch/ia64/linux-xen/mm_contig.c
+++ b/xen/arch/ia64/linux-xen/mm_contig.c
@@ -183,7 +183,7 @@ void *percpu_area __initdata = NULL;
 void* __init
 per_cpu_allocate(void *xen_heap_start, unsigned long end_in_pa)
 {
-	int order = get_order(NR_CPUS * PERCPU_PAGE_SIZE);
+	int order = get_order((NR_CPUS - 1) * PERCPU_PAGE_SIZE);
 	unsigned long size = 1UL << (order + PAGE_SHIFT);
 	unsigned long start = ALIGN_UP((unsigned long)xen_heap_start,
 				       PERCPU_PAGE_SIZE);
@@ -226,19 +226,31 @@ per_cpu_init (void)
 	 */
 	if (smp_processor_id() == 0) {
 #ifdef XEN
+		void *cpu0_data = __cpu0_per_cpu;
+
+		__per_cpu_offset[0] = (char *)cpu0_data - __per_cpu_start;
+		per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];
+
 		cpu_data = get_per_cpu_area();
 		if (cpu_data == NULL) 
 			panic("can't allocate per cpu area.\n");
+
+		for (cpu = 1; cpu < NR_CPUS; cpu++) {
+			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
+			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
+			cpu_data += PERCPU_PAGE_SIZE;
+			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+		}
 #else
 		cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
 					   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
-#endif
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 			memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
 			__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
 			cpu_data += PERCPU_PAGE_SIZE;
 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 		}
+#endif
 	}
 	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
diff --git a/xen/arch/ia64/linux-xen/smpboot.c b/xen/arch/ia64/linux-xen/smpboot.c
--- a/xen/arch/ia64/linux-xen/smpboot.c
+++ b/xen/arch/ia64/linux-xen/smpboot.c
@@ -449,8 +449,8 @@ start_secondary (void *unused)
 {
 	/* Early console may use I/O ports */
 	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef XEN
 	Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
-#ifndef XEN
 	efi_map_pal_code();
 #endif
 	cpu_init();
diff --git a/xen/arch/ia64/xen/xen.lds.S b/xen/arch/ia64/xen/xen.lds.S
--- a/xen/arch/ia64/xen/xen.lds.S
+++ b/xen/arch/ia64/xen/xen.lds.S
@@ -195,7 +195,17 @@ SECTIONS
 
   data : { } :data
   .data : AT(ADDR(.data) - LOAD_OFFSET)
-	{ *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
+	{
+#ifdef CONFIG_SMP
+  . = ALIGN(PERCPU_PAGE_SIZE);
+		__cpu0_per_cpu = .;
+ . = . + PERCPU_PAGE_SIZE;    /* cpu0 per-cpu space */
+#endif
+		*(.data)
+		*(.data1)
+		*(.gnu.linkonce.d*)
+		CONSTRUCTORS
+	}
 
   . = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
   .got : AT(ADDR(.got) - LOAD_OFFSET)
diff --git a/xen/include/asm-ia64/linux-xen/asm/README.origin b/xen/include/asm-ia64/linux-xen/asm/README.origin
--- a/xen/include/asm-ia64/linux-xen/asm/README.origin
+++ b/xen/include/asm-ia64/linux-xen/asm/README.origin
@@ -22,6 +22,7 @@ pgtable.h		-> linux/include/asm-ia64/pgt
 processor.h		-> linux/include/asm-ia64/processor.h
 ptrace.h		-> linux/include/asm-ia64/ptrace.h
 sal.h			-> linux/include/asm-ia64/sal.h
+sections.h		-> linux/include/asm-ia64/sections.h
 smp.h			-> linux/include/asm-ia64/smp.h
 spinlock.h		-> linux/include/asm-ia64/spinlock.h
 system.h		-> linux/include/asm-ia64/system.h
diff --git a/xen/include/asm-ia64/linux/asm/sections.h b/xen/include/asm-ia64/linux-xen/asm/sections.h
rename from xen/include/asm-ia64/linux/asm/sections.h
rename to xen/include/asm-ia64/linux-xen/asm/sections.h
--- a/xen/include/asm-ia64/linux/asm/sections.h
+++ b/xen/include/asm-ia64/linux-xen/asm/sections.h
@@ -9,6 +9,9 @@
 #include <asm-generic/sections.h>
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
+#ifdef CONFIG_SMP
+extern char __cpu0_per_cpu[];
+#endif
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
 extern char __start_gate_section[];
diff --git a/xen/include/asm-ia64/linux/asm/README.origin b/xen/include/asm-ia64/linux/asm/README.origin
--- a/xen/include/asm-ia64/linux/asm/README.origin
+++ b/xen/include/asm-ia64/linux/asm/README.origin
@@ -29,7 +29,6 @@ param.h			-> linux/include/asm-ia64/para
 patch.h			-> linux/include/asm-ia64/patch.h
 pci.h			-> linux/include/asm-ia64/pci.h
 rse.h			-> linux/include/asm-ia64/rse.h
-sections.h		-> linux/include/asm-ia64/sections.h
 setup.h			-> linux/include/asm-ia64/setup.h
 string.h		-> linux/include/asm-ia64/string.h
 thread_info.h		-> linux/include/asm-ia64/thread_info.h
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@lists.xensource.com
http://lists.xensource.com/xen-ia64-devel

Reply via email to