From: David Feng <feng...@phytium.com.cn>

Relocation code based on a patch by Scott Wood, which is:
Signed-off-by: Scott Wood <scottw...@freescale.com>

Signed-off-by: David Feng <feng...@phytium.com.cn>
---
 arch/arm/config.mk                      |    3 +-
 arch/arm/cpu/armv8/Makefile             |   38 +++++
 arch/arm/cpu/armv8/cache.S              |  130 +++++++++++++++++
 arch/arm/cpu/armv8/cache_v8.c           |  218 ++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/config.mk            |   15 ++
 arch/arm/cpu/armv8/cpu.c                |   67 +++++++++
 arch/arm/cpu/armv8/exceptions.S         |  112 +++++++++++++++
 arch/arm/cpu/armv8/start.S              |  234 +++++++++++++++++++++++++++++++
 arch/arm/cpu/armv8/timer.c              |   80 +++++++++++
 arch/arm/cpu/armv8/tlb.S                |   30 ++++
 arch/arm/cpu/armv8/u-boot.lds           |   89 ++++++++++++
 arch/arm/include/asm/armv8/mmu.h        |  110 +++++++++++++++
 arch/arm/include/asm/byteorder.h        |   12 ++
 arch/arm/include/asm/cache.h            |    5 +
 arch/arm/include/asm/config.h           |    6 +
 arch/arm/include/asm/global_data.h      |    6 +-
 arch/arm/include/asm/io.h               |   15 +-
 arch/arm/include/asm/macro.h            |   36 +++++
 arch/arm/include/asm/posix_types.h      |   10 ++
 arch/arm/include/asm/proc-armv/ptrace.h |   21 +++
 arch/arm/include/asm/proc-armv/system.h |   59 +++++++-
 arch/arm/include/asm/system.h           |   77 ++++++++++
 arch/arm/include/asm/types.h            |    4 +
 arch/arm/include/asm/u-boot.h           |    4 +
 arch/arm/include/asm/unaligned.h        |    2 +-
 arch/arm/lib/Makefile                   |   14 ++
 arch/arm/lib/board.c                    |    7 +-
 arch/arm/lib/bootm.c                    |   16 +++
 arch/arm/lib/crt0_64.S                  |  113 +++++++++++++++
 arch/arm/lib/interrupts_64.c            |  120 ++++++++++++++++
 arch/arm/lib/relocate_64.S              |   58 ++++++++
 common/image.c                          |    1 +
 doc/README.arm64                        |   45 ++++++
 examples/standalone/stubs.c             |   15 ++
 include/image.h                         |    1 +
 35 files changed, 1762 insertions(+), 11 deletions(-)
 create mode 100644 arch/arm/cpu/armv8/Makefile
 create mode 100644 arch/arm/cpu/armv8/cache.S
 create mode 100644 arch/arm/cpu/armv8/cache_v8.c
 create mode 100644 arch/arm/cpu/armv8/config.mk
 create mode 100644 arch/arm/cpu/armv8/cpu.c
 create mode 100644 arch/arm/cpu/armv8/exceptions.S
 create mode 100644 arch/arm/cpu/armv8/start.S
 create mode 100644 arch/arm/cpu/armv8/timer.c
 create mode 100644 arch/arm/cpu/armv8/tlb.S
 create mode 100644 arch/arm/cpu/armv8/u-boot.lds
 create mode 100644 arch/arm/include/asm/armv8/mmu.h
 create mode 100644 arch/arm/lib/crt0_64.S
 create mode 100644 arch/arm/lib/interrupts_64.c
 create mode 100644 arch/arm/lib/relocate_64.S
 create mode 100644 doc/README.arm64

diff --git a/arch/arm/config.mk b/arch/arm/config.mk
index d0cf43f..a259193 100644
--- a/arch/arm/config.mk
+++ b/arch/arm/config.mk
@@ -17,7 +17,8 @@ endif
 
 LDFLAGS_FINAL += --gc-sections
 PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections \
-                     -fno-common -ffixed-r9 -msoft-float
+                     -fno-common -ffixed-r9
+PLATFORM_RELFLAGS += $(call cc-option, -msoft-float)
 
 # Support generic board on ARM
 __HAVE_ARCH_GENERIC_BOARD := y
diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
new file mode 100644
index 0000000..b216f27
--- /dev/null
+++ b/arch/arm/cpu/armv8/Makefile
@@ -0,0 +1,38 @@
+#
+# (C) Copyright 2000-2003
+# Wolfgang Denk, DENX Software Engineering, w...@denx.de.
+#
+# SPDX-License-Identifier:     GPL-2.0+
+#
+
+include $(TOPDIR)/config.mk
+
+LIB    = $(obj)lib$(CPU).o
+
+START  := start.o
+
+COBJS  += cpu.o
+COBJS  += timer.o
+COBJS  += cache_v8.o
+
+SOBJS  += exceptions.o
+SOBJS  += cache.o
+SOBJS  += tlb.o
+
+SRCS   := $(START:.o=.S) $(COBJS:.o=.c)
+OBJS   := $(addprefix $(obj),$(COBJS) $(SOBJS))
+START  := $(addprefix $(obj),$(START))
+
+all:   $(obj).depend $(START) $(LIB)
+
+$(LIB):        $(OBJS)
+       $(call cmd_link_o_target, $(OBJS))
+
+#########################################################################
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
new file mode 100644
index 0000000..419f169
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache.S
@@ -0,0 +1,130 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * void __asm_flush_dcache_level(level)
+ *
+ * clean and invalidate one level cache.
+ *
+ * x0: cache level
+ * x1~x9: clobbered
+ */
+ENTRY(__asm_flush_dcache_level)
+       lsl     x1, x0, #1
+       msr     csselr_el1, x1          /* select cache level */
+       isb                             /* isb to sych the new cssr & csidr */
+       mrs     x6, ccsidr_el1          /* read the new ccsidr */
+       and     x2, x6, #7              /* x2 <- length of the cache lines */
+       add     x2, x2, #4              /* add 4 (line length offset) */
+       mov     x3, #0x3ff
+       and     x3, x3, x6, lsr #3      /* x3 <- maximum number of way size */
+       clz     w5, w3                  /* bit position of way size */
+       mov     x4, #0x7fff
+       and     x4, x4, x1, lsr #13     /* x4 <- max number of the set size */
+       /* x1 <- cache level << 1 */
+       /* x2 <- line length offset */
+       /* x3 <- number of cache ways */
+       /* x4 <- number of cache sets */
+       /* x5 <- bit position of way size */
+
+loop_set:
+       mov     x6, x3                  /* create working copy of way size */
+loop_way:
+       lsl     x7, x6, x5
+       orr     x9, x0, x7              /* map way and level to cisw value */
+       lsl     x7, x4, x2
+       orr     x9, x9, x7              /* map set number to cisw value */
+       dc      cisw, x9                /* clean & invalidate by set/way */
+       subs    x6, x6, #1              /* decrement the way */
+       b.ge    loop_way
+       subs    x4, x4, #1              /* decrement the set */
+       b.ge    loop_set
+
+       ret
+ENDPROC(__asm_flush_dcache_level)
+
+/*
+ * void __asm_flush_dcache_all(void)
+ *
+ * clean and invalidate all data cache by SET/WAY.
+ */
+ENTRY(__asm_flush_dcache_all)
+       dsb     sy
+       mov     x15, lr
+       mrs     x10, clidr_el1          /* read clidr */
+       lsr     x11, x10, #24
+       and     x11, x11, #0x7          /* x11 <- loc */
+       cbz     x11, finished           /* if loc is 0, no need to clean */
+       mov     x0, #0                  /* start flush at cache level 0 */
+       /* x0  <- cache level */
+       /* x10 <- clidr_el1 */
+       /* x11 <- loc */
+
+loop_level:
+       lsl     x1, x0, #1
+       add     x1, x1, x0              /* x0 <- 3x cache level */
+       lsr     x1, x10, x1
+       and     x1, x1, #7              /* x1 <- cache type */
+       cmp     x1, #2
+       b.lt    skip                    /* skip if no cache or icache */
+       bl      __asm_flush_dcache_level
+skip:
+       add     x0, x0, #1              /* increment cache level */
+       cmp     x11, x0
+       b.gt    loop_level
+
+finished:
+       mov     x0, #0
+       msr     csselr_el1, x0          /* swith back to cache level 0 */
+       dsb     sy
+       isb
+       mov     lr, x15
+       ret
+ENDPROC(__asm_flush_dcache_all)
+
+/*
+ * void __asm_flush_dcache_range(start, end)
+ *
+ * clean & invalidate data cache in the range
+ *
+ * x0: start address
+ * x1: end address
+ */
+ENTRY(__asm_flush_dcache_range)
+       mrs     x3, ctr_el0             /* read CTR */
+       lsr     x3, x3, #16
+       and     x3, x3, #0xf            /* cache line size encoding */
+       mov     x2, #4                  /* bytes per word */
+       lsl     x2, x2, x3              /* actual cache line size */
+
+       /* x2 <- minimal cache line size in cache system */
+       sub     x3, x2, #1
+       bic     x0, x0, x3
+1:      dc     civac, x0               /* clean & invalidate D/unified line */
+       add     x0, x0, x2
+       cmp     x0, x1
+       b.lo    1b
+       dsb     sy
+       ret
+ENDPROC(__asm_flush_dcache_range)
+
+/*
+ * void __asm_invalidate_icache_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_icache_all)
+       ic      ialluis
+       isb     sy
+       ret
+ENDPROC(__asm_invalidate_icache_all)
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
new file mode 100644
index 0000000..efd7466
--- /dev/null
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -0,0 +1,218 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/system.h>
+#include <asm/armv8/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+
+static void set_pgtable_section(u64 section, u64 memory_type)
+{
+       u64 *page_table = (u64 *)gd->arch.tlb_addr;
+       u64 value;
+
+       value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF;
+       value |= PMD_ATTRINDX(memory_type);
+       page_table[section] = value;
+}
+
+/* to activate the MMU we need to set up virtual memory */
+static void mmu_setup(void)
+{
+       int i, j, el;
+       bd_t *bd = gd->bd;
+
+       /* Setup an identity-mapping for all spaces */
+       for (i = 0; i < (PGTABLE_SIZE >> 3); i++)
+               set_pgtable_section(i, MT_DEVICE_NGNRNE);
+
+       /* Setup an identity-mapping for all RAM space */
+       for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+               ulong start = bd->bi_dram[i].start;
+               ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size;
+               for (j = start >> SECTION_SHIFT;
+                    j < end >> SECTION_SHIFT; j++) {
+                       set_pgtable_section(j, MT_NORMAL);
+               }
+       }
+
+       /* load TTBR0 */
+       el = curent_el();
+       if (el == 1)
+               asm volatile("msr ttbr0_el1, %0"
+                            : : "r" (gd->arch.tlb_addr) : "memory");
+       else if (el == 2)
+               asm volatile("msr ttbr0_el2, %0"
+                            : : "r" (gd->arch.tlb_addr) : "memory");
+       else
+               panic("Not Supported Exception Level");
+
+       /* enable the mmu */
+       set_sctlr(get_sctlr() | CR_M);
+}
+
+/*
+ * Performs a invalidation of the entire data cache at all levels
+ */
+void invalidate_dcache_all(void)
+{
+       __asm_flush_dcache_all();
+}
+
+/*
+ * Performs a clean & invalidation of the entire data cache at all levels
+ */
+void flush_dcache_all(void)
+{
+       __asm_flush_dcache_all();
+}
+
+/*
+ * Invalidates range in all levels of D-cache/unified cache
+ */
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+       __asm_flush_dcache_range(start, stop);
+}
+
+/*
+ * Flush range(clean & invalidate) from all levels of D-cache/unified cache
+ */
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+       __asm_flush_dcache_range(start, stop);
+}
+
+void dcache_enable(void)
+{
+       /* The data cache is not active unless the mmu is enabled */
+       if (!(get_sctlr() & CR_M)) {
+               invalidate_dcache_all();
+               __asm_invalidate_tlb_all();
+               mmu_setup();
+       }
+
+       set_sctlr(get_sctlr() | CR_C);
+}
+
+void dcache_disable(void)
+{
+       uint32_t sctlr;
+
+       sctlr = get_sctlr();
+
+       /* if cache isn't enabled no need to disable */
+       if (!(sctlr & CR_C))
+               return;
+
+       set_sctlr(sctlr & ~(CR_C|CR_M));
+
+       flush_dcache_all();
+       __asm_invalidate_tlb_all();
+}
+
+int dcache_status(void)
+{
+       return (get_sctlr() & CR_C) != 0;
+}
+
+#else  /* CONFIG_SYS_DCACHE_OFF */
+
+void invalidate_dcache_all(void)
+{
+}
+
+void flush_dcache_all(void)
+{
+}
+
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+}
+
+void dcache_enable(void)
+{
+}
+
+void dcache_disable(void)
+{
+}
+
+int dcache_status(void)
+{
+       return 0;
+}
+
+#endif /* CONFIG_SYS_DCACHE_OFF */
+
+#ifndef CONFIG_SYS_ICACHE_OFF
+
+void icache_enable(void)
+{
+       set_sctlr(get_sctlr() | CR_I);
+}
+
+void icache_disable(void)
+{
+       set_sctlr(get_sctlr() & ~CR_I);
+}
+
+int icache_status(void)
+{
+       return (get_sctlr() & CR_I) != 0;
+}
+
+void invalidate_icache_all(void)
+{
+       __asm_invalidate_icache_all();
+}
+
+#else  /* CONFIG_SYS_ICACHE_OFF */
+
+void icache_enable(void)
+{
+}
+
+void icache_disable(void)
+{
+}
+
+int icache_status(void)
+{
+       return 0;
+}
+
+void invalidate_icache_all(void)
+{
+}
+
+#endif /* CONFIG_SYS_ICACHE_OFF */
+
+/*
+ * Enable dCache & iCache, whether cache is actually enabled
+ * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
+ */
+void enable_caches(void)
+{
+       icache_enable();
+       dcache_enable();
+}
+
+/*
+ * Flush range from all levels of d-cache/unified-cache
+ */
+void flush_cache(unsigned long start, unsigned long size)
+{
+       flush_dcache_range(start, start + size);
+}
diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk
new file mode 100644
index 0000000..027a68c
--- /dev/null
+++ b/arch/arm/cpu/armv8/config.mk
@@ -0,0 +1,15 @@
+#
+# (C) Copyright 2002
+# Gary Jennejohn, DENX Software Engineering, <ga...@denx.de>
+#
+# SPDX-License-Identifier:     GPL-2.0+
+#
+PLATFORM_RELFLAGS += -fno-common -ffixed-x18
+
+# SEE README.arm-unaligned-accesses
+PF_NO_UNALIGNED := $(call cc-option, -mstrict-align)
+PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED)
+
+PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a)
+PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8)
+PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED)
diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c
new file mode 100644
index 0000000..83e73ab
--- /dev/null
+++ b/arch/arm/cpu/armv8/cpu.c
@@ -0,0 +1,67 @@
+/*
+ * (C) Copyright 2008 Texas Insturments
+ *
+ * (C) Copyright 2002
+ * Sysgo Real-Time Solutions, GmbH <www.elinos.com>
+ * Marius Groeger <mgroe...@sysgo.de>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <ga...@denx.de>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+/*
+ * CPU specific code
+ */
+
+#include <common.h>
+#include <command.h>
+#include <asm/system.h>
+#include <linux/compiler.h>
+
+void __weak cpu_cache_initialization(void){}
+
+int cleanup_before_linux(void)
+{
+       /*
+        * this function is called just before we call linux
+        * it prepares the processor for linux
+        *
+        * we turn off caches etc ...
+        */
+#ifndef CONFIG_SPL_BUILD
+       disable_interrupts();
+#endif
+
+       /*
+        * Turn off I-cache and invalidate it
+        */
+       icache_disable();
+       invalidate_icache_all();
+
+       /*
+        * turn off D-cache
+        * dcache_disable() in turn flushes the d-cache and disables MMU
+        */
+       dcache_disable();
+
+       /*
+        * After D-cache is flushed and before it is disabled there may
+        * be some new valid entries brought into the cache. We are sure
+        * that these lines are not dirty and will not affect our execution.
+        * (because unwinding the call-stack and setting a bit in CP15 SCTRL
+        * is all we did during this. We have not pushed anything on to the
+        * stack. Neither have we affected any static data)
+        * So just invalidate the entire d-cache again to avoid coherency
+        * problems for kernel
+        */
+       invalidate_dcache_all();
+
+       /*
+        * Some CPU need more cache attention before starting the kernel.
+        */
+       cpu_cache_initialization();
+
+       return 0;
+}
diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
new file mode 100644
index 0000000..b00d4b3
--- /dev/null
+++ b/arch/arm/cpu/armv8/exceptions.S
@@ -0,0 +1,112 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <asm/ptrace.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * Enter Exception.
+ * This will save the processor state that is ELR/X0~X30
+ * to the stack frame.
+ */
+.macro exception_entry
+       stp     x29, x30, [sp, #-16]!
+       stp     x27, x28, [sp, #-16]!
+       stp     x25, x26, [sp, #-16]!
+       stp     x23, x24, [sp, #-16]!
+       stp     x21, x22, [sp, #-16]!
+       stp     x19, x20, [sp, #-16]!
+       stp     x17, x18, [sp, #-16]!
+       stp     x15, x16, [sp, #-16]!
+       stp     x13, x14, [sp, #-16]!
+       stp     x11, x12, [sp, #-16]!
+       stp     x9, x10, [sp, #-16]!
+       stp     x7, x8, [sp, #-16]!
+       stp     x5, x6, [sp, #-16]!
+       stp     x3, x4, [sp, #-16]!
+       stp     x1, x2, [sp, #-16]!
+
+       /* Could be running at EL1 or EL2 */
+       mrs     x11, CurrentEL
+       cmp     x11, 0x8
+       b.eq    1f
+       mrs     x1, esr_el1
+       mrs     x2, elr_el1
+       b       2f
+1:     mrs     x1, esr_el2
+       mrs     x2, elr_el2
+2:
+       stp     x2, x0, [sp, #-16]!
+       mov     x0, sp
+.endm
+
+/*
+ * Exception vectors.
+ */
+       .align  11
+       .globl  vectors
+vectors:
+       .align  7
+       b       _do_bad_sync    /* Current EL Synchronous Thread */
+
+       .align  7
+       b       _do_bad_irq     /* Current EL IRQ Thread */
+
+       .align  7
+       b       _do_bad_fiq     /* Current EL FIQ Thread */
+
+       .align  7
+       b       _do_bad_error   /* Current EL Error Thread */
+
+       .align  7
+       b       _do_sync        /* Current EL Synchronous Handler */
+
+       .align  7
+       b       _do_irq         /* Current EL IRQ Handler */
+
+       .align  7
+       b       _do_fiq         /* Current EL FIQ Handler */
+
+       .align  7
+       b       _do_error       /* Current EL Error Handler */
+
+
+_do_bad_sync:
+       exception_entry
+       bl      do_bad_sync
+
+_do_bad_irq:
+       exception_entry
+       bl      do_bad_irq
+
+_do_bad_fiq:
+       exception_entry
+       bl      do_bad_fiq
+
+_do_bad_error:
+       exception_entry
+       bl      do_bad_error
+
+_do_sync:
+       exception_entry
+       bl      do_sync
+
+_do_irq:
+       exception_entry
+       bl      do_irq
+
+_do_fiq:
+       exception_entry
+       bl      do_fiq
+
+_do_error:
+       exception_entry
+       bl      do_error
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
new file mode 100644
index 0000000..7352778
--- /dev/null
+++ b/arch/arm/cpu/armv8/start.S
@@ -0,0 +1,234 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+#include <asm/armv8/mmu.h>
+
+/*************************************************************************
+ *
+ * Startup Code (reset vector)
+ *
+ *************************************************************************/
+
+.globl _start
+_start:
+       b       reset
+
+       .align 3
+
+.globl _TEXT_BASE
+_TEXT_BASE:
+       .quad   CONFIG_SYS_TEXT_BASE
+
+/*
+ * These are defined in the linker script.
+ */
+.globl _end_ofs
+_end_ofs:
+       .quad   _end - _start
+
+.globl _bss_start_ofs
+_bss_start_ofs:
+       .quad   __bss_start - _start
+
+.globl _bss_end_ofs
+_bss_end_ofs:
+       .quad   __bss_end - _start
+
+reset:
+       /*
+        * Could be EL3/EL2/EL1
+        */
+       mrs     x0, CurrentEL
+       cmp     x0, #0xc
+       b.ne    reset_nonsecure                 /* Not EL3 */
+
+       bl      setup_el3                       /* EL3 initialization */
+
+       /*
+        * MMU Disabled, iCache Disabled, dCache Disabled
+        */
+reset_nonsecure:
+
+#ifdef CONFIG_BOOTING_EL1
+       branch_if_el2 x0, 1f
+       b       2f
+1:     bl      setup_el2                       /* EL2 initialization */
+2:
+#endif
+
+       /* Initialize vBAR/CPACR_EL1/MDSCR_EL1 */
+       adr     x0, vectors
+       branch_if_el2 x1, 1f
+       msr     vbar_el1, x0
+       mov     x0, #3 << 20
+       msr     cpacr_el1, x0                   /* Enable FP/SIMD */
+       msr     mdscr_el1, xzr
+       b       2f
+1:     msr     vbar_el2, x0
+2:
+
+       /* Cache/BPB/TLB Invalidate */
+       bl      __asm_flush_dcache_all          /* dCache clean & invalidate */
+       bl      __asm_invalidate_icache_all     /* iCache invalidate */
+       bl      __asm_invalidate_tlb_all        /* invalidate TLBs */
+
+       /* Processor specific initialization */
+       bl      lowlevel_init
+
+       branch_if_slave x0, slave_cpu
+
+       /*
+        * Master CPU
+        */
+master_cpu:
+       bl      _main
+
+       /*
+        * Slave CPUs
+        */
+slave_cpu:
+       wfe
+       ldr     x1, =CPU_RELEASE_ADDR
+       ldr     x0, [x1]
+       cbz     x0, slave_cpu
+       br      x0                      /* branch to the given address */
+
+/*-------------------------------------------------------------------------*/
+
+WEAK(setup_el3)
+       mov     x0, #0x531      /* Non-secure EL0/EL1 | HVC | 64bit EL2 */
+       msr     scr_el3, x0
+       msr     cptr_el3, xzr   /* Disable coprocessor traps to EL3 */
+
+       /* GIC initialization */
+       branch_if_slave x0, 2f
+
+       /* Master initialize distributor */
+       ldr     x1, =GICD_BASE          /* GICD_CTLR */
+       mov     w0, #0x3                /* Enable Group0 & Group1 */
+       str     w0, [x1]
+       ldr     w0, [x1, #0x4]          /* GICD_TYPER */
+       and     w2, w0, #0x1f           /* ITLinesNumber */
+       add     w2, w2, #0x1            /* Number of GICD_IGROUPR registers */
+       add     x1, x1, #0x80           /* GICD_IGROUPR */
+       mov     w0, #~0                 /* All Group1 */
+1:     str     w0, [x1], #0x4
+       sub     w2, w2, #0x1
+       cbnz    w2, 1b
+       b       3f
+
+       /* Slave initialize distributor */
+2:     ldr     x1, =GICD_BASE          /* GICD_CTLR */
+       mov     w0, #~0                 /* All Group1 */
+       str     w0, [x1, #0x80]
+
+       /* Initialize cpu interface */
+3:     ldr     x1, =GICC_BASE          /* GICC_CTLR */
+       mov     w0, #0x3                /* Enable Group0 & Group1 */
+       str     w0, [x1]
+
+       mov     w0, #0x1 << 7           /* Non-Secure access to GICC_PMR */
+       str     w0, [x1, #0x4]          /* GICC_PMR */
+
+       /* Counter frequency initialization */
+       ldr     x0, =CONFIG_SYS_CNTFRQ
+       msr     cntfrq_el0, x0
+
+       /* SCTLR_EL2 initialization */
+       msr     sctlr_el2, xzr
+
+       /* Return to the EL2_SP2 mode from EL3 */
+       mov     x0, #0x3c9              /* EL2_SP2 | D | A | I | F */
+       msr     elr_el3, lr
+       msr     spsr_el3, x0
+       eret
+ENDPROC(setup_el3)
+
+WEAK(setup_el2)
+       /* Initialize Generic Timers */
+       mrs     x0, cnthctl_el2
+       orr     x0, x0, #0x3            /* Enable EL1 access to timers */
+       msr     cnthctl_el2, x0
+       msr     cntvoff_el2, x0         /* Clear virtual offset */
+       mrs     x0, cntkctl_el1
+       orr     x0, x0, #0x3            /* EL0 access to counters */
+       msr     cntkctl_el1, x0
+
+       /* Initilize MPID/MPIDR registers */
+       mrs     x0, midr_el1
+       mrs     x1, mpidr_el1
+       msr     vpidr_el2, x0
+       msr     vmpidr_el2, x1
+
+       /* Disable coprocessor traps */
+       mov     x0, #0x33ff
+       msr     cptr_el2, x0            /* Disable coprocessor traps to EL2 */
+       msr     hstr_el2, xzr           /* Disable CP15 traps to EL2 */
+
+       /* Initialize HCR_EL2 */
+       mov     x0, #(1 << 31)          /* 64bit EL1 */
+       orr     x0, x0, #(1 << 29)      /* Disable HVC */
+       msr     hcr_el2, x0
+
+       /* SCTLR_EL1 initialization */
+       mov     x0, #0x0800
+       movk    x0, #0x30d0, lsl #16
+       msr     sctlr_el1, x0
+
+       /* Return to the EL1_SP1 mode from EL2 */
+       mov     x0, #0x3c5              /* EL1_SP1 | D | A | I | F */
+       msr     elr_el2, lr
+       msr     spsr_el2, x0
+       eret
+ENDPROC(setup_el2)
+
+WEAK(lowlevel_init)
+       ret
+ENDPROC(lowlevel_init)
+
+/*-------------------------------------------------------------------------*/
+
+ENTRY(c_runtime_cpu_setup)
+       /* If I-cache is enabled invalidate it */
+#ifndef CONFIG_SYS_ICACHE_OFF
+       ic      iallu                   /* I+BTB cache invalidate */
+       isb     sy
+#endif
+
+#ifndef CONFIG_SYS_DCACHE_OFF
+       /*
+        * Setup MAIR and TCR.
+        */
+       ldr     x0, =MEMORY_ATTRIBUTES
+       ldr     x1, =TCR_FLAGS
+
+       branch_if_el2 x2, 1f
+       orr     x1, x1, TCR_EL1_IPS_BITS
+       msr     mair_el1, x0
+       msr     tcr_el1, x1
+       b       2f
+1:     orr     x1, x1, TCR_EL2_IPS_BITS
+       msr     mair_el2, x0
+       msr     tcr_el2, x1
+2:
+#endif
+
+       /* Relocate vBAR */
+       adr     x0, vectors
+       branch_if_el2 x1, 1f
+       msr     vbar_el1, x0
+       b       2f
+1:     msr     vbar_el2, x0
+2:
+
+       ret
+ENDPROC(c_runtime_cpu_setup)
diff --git a/arch/arm/cpu/armv8/timer.c b/arch/arm/cpu/armv8/timer.c
new file mode 100644
index 0000000..9605e84
--- /dev/null
+++ b/arch/arm/cpu/armv8/timer.c
@@ -0,0 +1,80 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <common.h>
+#include <div64.h>
+#include <linux/compiler.h>
+
+/*
+ * Genertic Timer implementation of __udelay/get_timer/get_ticks/get_tbclk
+ * functions. If any other timers used, another implementation should be
+ * placed in platform code.
+ */
+
+static inline u64 get_cntfrq(void)
+{
+       u64 cntfrq;
+       asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
+       return cntfrq;
+}
+
+static inline u64 tick_to_time(u64 tick)
+{
+       tick *= CONFIG_SYS_HZ;
+       do_div(tick, get_cntfrq());
+       return tick;
+}
+
+static inline u64 time_to_tick(u64 time)
+{
+       time *= get_cntfrq();
+       do_div(time, CONFIG_SYS_HZ);
+       return time;
+}
+
+/*
+ * Generic timer implementation of get_tbclk()
+ */
+ulong __weak get_tbclk(void)
+{
+       return CONFIG_SYS_HZ;
+}
+
+/*
+ * Generic timer implementation of get_timer()
+ */
+ulong __weak get_timer(ulong base)
+{
+       u64 cval;
+
+       isb();
+       asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
+
+       return tick_to_time(cval) - base;
+}
+
+/*
+ * Generic timer implementation of get_ticks()
+ */
+unsigned long long __weak get_ticks(void)
+{
+       return get_timer(0);
+}
+
+/*
+ * Generic timer implementation of __udelay()
+ */
+void __weak __udelay(ulong usec)
+{
+       unsigned long ticks, limit;
+
+       limit = get_ticks() + usec/1000;
+
+       do {
+               ticks = get_ticks();
+       } while (ticks < limit);
+}
diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S
new file mode 100644
index 0000000..4dd3a65
--- /dev/null
+++ b/arch/arm/cpu/armv8/tlb.S
@@ -0,0 +1,30 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <version.h>
+#include <linux/linkage.h>
+#include <asm/macro.h>
+
+/*
+ * void __asm_invalidate_tlb_all(void)
+ *
+ * invalidate all tlb entries.
+ */
+ENTRY(__asm_invalidate_tlb_all)
+       branch_if_el2 x9, 1f
+       tlbi    vmalle1
+       dsb     sy
+       isb
+       b       2f
+1:     tlbi    alle2
+       dsb     sy
+       isb
+2:
+       ret
+ENDPROC(__asm_invalidate_tlb_all)
diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
new file mode 100644
index 0000000..4c12222
--- /dev/null
+++ b/arch/arm/cpu/armv8/u-boot.lds
@@ -0,0 +1,89 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * (C) Copyright 2002
+ * Gary Jennejohn, DENX Software Engineering, <ga...@denx.de>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", 
"elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+ENTRY(_start)
+SECTIONS
+{
+       . = 0x00000000;
+
+       . = ALIGN(8);
+       .text :
+       {
+               *(.__image_copy_start)
+               CPUDIR/start.o (.text*)
+               *(.text*)
+       }
+
+       . = ALIGN(8);
+       .rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) }
+
+       . = ALIGN(8);
+       .data : {
+               *(.data*)
+       }
+
+       . = ALIGN(8);
+
+       . = .;
+
+       . = ALIGN(8);
+       .u_boot_list : {
+               KEEP(*(SORT(.u_boot_list*)));
+       }
+
+       . = ALIGN(8);
+
+       .image_copy_end :
+       {
+               *(.__image_copy_end)
+       }
+
+       . = ALIGN(8);
+
+       .rel_dyn_start :
+       {
+               *(.__rel_dyn_start)
+       }
+
+       .rela.dyn : {
+               *(.rela*)
+       }
+
+       .rel_dyn_end :
+       {
+               *(.__rel_dyn_end)
+       }
+
+       _end = .;
+
+       . = ALIGN(8);
+
+       .bss_start : {
+               KEEP(*(.__bss_start));
+       }
+
+       .bss : {
+               *(.bss*)
+                . = ALIGN(8);
+       }
+
+       .bss_end : {
+               KEEP(*(.__bss_end));
+       }
+
+       /DISCARD/ : { *(.dynsym) }
+       /DISCARD/ : { *(.dynstr*) }
+       /DISCARD/ : { *(.dynamic*) }
+       /DISCARD/ : { *(.plt*) }
+       /DISCARD/ : { *(.interp*) }
+       /DISCARD/ : { *(.gnu*) }
+}
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
new file mode 100644
index 0000000..c131751
--- /dev/null
+++ b/arch/arm/include/asm/armv8/mmu.h
@@ -0,0 +1,110 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#ifndef _ASM_ARMV8_MMU_H_
+#define _ASM_ARMV8_MMU_H_
+
+#ifdef __ASSEMBLY__
+#define _AC(X, Y)      X
+#else
+#define _AC(X, Y)      (X##Y)
+#endif
+
+#define UL(x)          _AC(x, UL)
+
+/***************************************************************/
+/*
+ * The following definitions are related each other, shoud be
+ * calculated specifically.
+ */
+#define VA_BITS                        (42)    /* 42 bits virtual address */
+
+/* PAGE_SHIFT determines the page size */
+#undef  PAGE_SIZE
+#define PAGE_SHIFT             16
+#define PAGE_SIZE              (1 << PAGE_SHIFT)
+#define PAGE_MASK              (~(PAGE_SIZE-1))
+
+/*
+ * section address mask and size definitions.
+ */
+#define SECTION_SHIFT          29
+#define SECTION_SIZE           (UL(1) << SECTION_SHIFT)
+#define SECTION_MASK           (~(SECTION_SIZE-1))
+/***************************************************************/
+
+/*
+ * Memory types
+ */
+#define MT_DEVICE_NGNRNE       0
+#define MT_DEVICE_NGNRE                1
+#define MT_DEVICE_GRE          2
+#define MT_NORMAL_NC           3
+#define MT_NORMAL              4
+
+#define MEMORY_ATTRIBUTES      ((0x00 << (MT_DEVICE_NGNRNE*8)) |       \
+                               (0x04 << (MT_DEVICE_NGNRE*8)) |         \
+                               (0x0c << (MT_DEVICE_GRE*8)) |           \
+                               (0x44 << (MT_NORMAL_NC*8)) |            \
+                               (UL(0xff) << (MT_NORMAL*8)))
+
+/*
+ * Hardware page table definitions.
+ *
+ * Level 2 descriptor (PMD).
+ */
+#define PMD_TYPE_MASK          (3 << 0)
+#define PMD_TYPE_FAULT         (0 << 0)
+#define PMD_TYPE_TABLE         (3 << 0)
+#define PMD_TYPE_SECT          (1 << 0)
+
+/*
+ * Section
+ */
+#define PMD_SECT_S             (3 << 8)
+#define PMD_SECT_AF            (1 << 10)
+#define PMD_SECT_NG            (1 << 11)
+#define PMD_SECT_PXN           (UL(1) << 53)
+#define PMD_SECT_UXN           (UL(1) << 54)
+
+/*
+ * AttrIndx[2:0]
+ */
+#define PMD_ATTRINDX(t)                ((t) << 2)
+#define PMD_ATTRINDX_MASK      (7 << 2)
+
+/*
+ * TCR flags.
+ */
+#define TCR_T0SZ(x)            ((64 - (x)) << 0)
+#define TCR_IRGN_NC            (0 << 8)
+#define TCR_IRGN_WBWA          (1 << 8)
+#define TCR_IRGN_WT            (2 << 8)
+#define TCR_IRGN_WBNWA         (3 << 8)
+#define TCR_IRGN_MASK          (3 << 8)
+#define TCR_ORGN_NC            (0 << 10)
+#define TCR_ORGN_WBWA          (1 << 10)
+#define TCR_ORGN_WT            (2 << 10)
+#define TCR_ORGN_WBNWA         (3 << 10)
+#define TCR_ORGN_MASK          (3 << 10)
+#define TCR_SHARED_NON         (0 << 12)
+#define TCR_SHARED_OUTER       (1 << 12)
+#define TCR_SHARED_INNER       (2 << 12)
+#define TCR_TG0_4K             (0 << 14)
+#define TCR_TG0_64K            (1 << 14)
+#define TCR_TG0_16K            (2 << 14)
+#define TCR_EL1_IPS_BITS       (UL(3) << 32)   /* 42 bits physical address */
+#define TCR_EL2_IPS_BITS       (3 << 16)       /* 42 bits physical address */
+
+/* PTWs cacheable, inner/outer WBWA and non-shareable */
+#define TCR_FLAGS              (TCR_TG0_64K |          \
+                               TCR_SHARED_NON |        \
+                               TCR_ORGN_WBWA |         \
+                               TCR_IRGN_WBWA |         \
+                               TCR_T0SZ(VA_BITS))
+
+#endif /* _ASM_ARMV8_MMU_H_ */
diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h
index c3489f1..71a9966 100644
--- a/arch/arm/include/asm/byteorder.h
+++ b/arch/arm/include/asm/byteorder.h
@@ -23,10 +23,22 @@
 #  define __SWAB_64_THRU_32__
 #endif
 
+#ifdef CONFIG_ARM64
+
+#ifdef __AARCH64EB__
+#include <linux/byteorder/big_endian.h>
+#else
+#include <linux/byteorder/little_endian.h>
+#endif
+
+#else  /* CONFIG_ARM64 */
+
 #ifdef __ARMEB__
 #include <linux/byteorder/big_endian.h>
 #else
 #include <linux/byteorder/little_endian.h>
 #endif
 
+#endif /* CONFIG_ARM64 */
+
 #endif
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index 6d60a4a..ddebbc8 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -11,6 +11,8 @@
 
 #include <asm/system.h>
 
+#ifndef CONFIG_ARM64
+
 /*
  * Invalidate L2 Cache using co-proc instruction
  */
@@ -28,6 +30,9 @@ void l2_cache_disable(void);
 void set_section_dcache(int section, enum dcache_option option);
 
 void dram_bank_mmu_setup(int bank);
+
+#endif
+
 /*
  * The current upper bound for ARM L1 data cache line sizes is 64 bytes.  We
  * use that value for aligning DMA buffers unless the board config has 
specified
diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h
index 99b703e..abf79e5 100644
--- a/arch/arm/include/asm/config.h
+++ b/arch/arm/include/asm/config.h
@@ -9,4 +9,10 @@
 
 #define CONFIG_LMB
 #define CONFIG_SYS_BOOT_RAMDISK_HIGH
+
+#ifdef CONFIG_ARM64
+#define CONFIG_PHYS_64BIT
+#define CONFIG_STATIC_RELA
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/global_data.h 
b/arch/arm/include/asm/global_data.h
index e126436..60e8726 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -47,6 +47,10 @@ struct arch_global_data {
 
 #include <asm-generic/global_data.h>
 
-#define DECLARE_GLOBAL_DATA_PTR     register volatile gd_t *gd asm ("r9")
+#ifdef CONFIG_ARM64
+#define DECLARE_GLOBAL_DATA_PTR                register volatile gd_t *gd asm 
("x18")
+#else
+#define DECLARE_GLOBAL_DATA_PTR                register volatile gd_t *gd asm 
("r9")
+#endif
 
 #endif /* __ASM_GBL_DATA_H */
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1fbc531..6a1f05a 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -75,42 +75,45 @@ static inline phys_addr_t virt_to_phys(void * vaddr)
 #define __arch_putw(v,a)               (*(volatile unsigned short *)(a) = (v))
 #define __arch_putl(v,a)               (*(volatile unsigned int *)(a) = (v))
 
-extern inline void __raw_writesb(unsigned int addr, const void *data, int 
bytelen)
+extern inline void __raw_writesb(unsigned long addr, const void *data,
+                                int bytelen)
 {
        uint8_t *buf = (uint8_t *)data;
        while(bytelen--)
                __arch_putb(*buf++, addr);
 }
 
-extern inline void __raw_writesw(unsigned int addr, const void *data, int 
wordlen)
+extern inline void __raw_writesw(unsigned long addr, const void *data,
+                                int wordlen)
 {
        uint16_t *buf = (uint16_t *)data;
        while(wordlen--)
                __arch_putw(*buf++, addr);
 }
 
-extern inline void __raw_writesl(unsigned int addr, const void *data, int 
longlen)
+extern inline void __raw_writesl(unsigned long addr, const void *data,
+                                int longlen)
 {
        uint32_t *buf = (uint32_t *)data;
        while(longlen--)
                __arch_putl(*buf++, addr);
 }
 
-extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen)
+extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen)
 {
        uint8_t *buf = (uint8_t *)data;
        while(bytelen--)
                *buf++ = __arch_getb(addr);
 }
 
-extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen)
+extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen)
 {
        uint16_t *buf = (uint16_t *)data;
        while(wordlen--)
                *buf++ = __arch_getw(addr);
 }
 
-extern inline void __raw_readsl(unsigned int addr, void *data, int longlen)
+extern inline void __raw_readsl(unsigned long addr, void *data, int longlen)
 {
        uint32_t *buf = (uint32_t *)data;
        while(longlen--)
diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h
index ff13f36..0977830 100644
--- a/arch/arm/include/asm/macro.h
+++ b/arch/arm/include/asm/macro.h
@@ -54,5 +54,41 @@
        bcs     1b
 .endm
 
+#ifdef CONFIG_ARM64
+/*
+ * Register aliases.
+ */
+lr     .req    x30
+
+/*
+ * Branch according to exception level
+ */
+.macro branch_if_el2, xreg, el2_label
+       mrs     \xreg, CurrentEL
+       cmp     \xreg, 0x8
+       b.eq    \el2_label
+.endm
+
+/*
+ * Branch if current processor is a slave,
+ * choose processor with all zero affinity value as the master.
+ */
+.macro branch_if_slave, xreg, slave_label
+       mrs     \xreg, mpidr_el1
+       tst     \xreg, #0xff            /* Test Affinity 0 */
+       b.ne    \slave_label
+       lsr     \xreg, \xreg, #8
+       tst     \xreg, #0xff            /* Test Affinity 1 */
+       b.ne    \slave_label
+       lsr     \xreg, \xreg, #8
+       tst     \xreg, #0xff            /* Test Affinity 2 */
+       b.ne    \slave_label
+       lsr     \xreg, \xreg, #16
+       tst     \xreg, #0xff            /* Test Affinity 3 */
+       b.ne    \slave_label
+.endm
+
+#endif /* CONFIG_ARM64 */
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_ARM_MACRO_H__ */
diff --git a/arch/arm/include/asm/posix_types.h 
b/arch/arm/include/asm/posix_types.h
index c412486..9ba9add 100644
--- a/arch/arm/include/asm/posix_types.h
+++ b/arch/arm/include/asm/posix_types.h
@@ -13,6 +13,8 @@
 #ifndef __ARCH_ARM_POSIX_TYPES_H
 #define __ARCH_ARM_POSIX_TYPES_H
 
+#include <config.h>
+
 /*
  * This file is generally used by user-level software, so you need to
  * be a little careful about namespace pollution etc.  Also, we cannot
@@ -28,9 +30,17 @@ typedef int                  __kernel_pid_t;
 typedef unsigned short         __kernel_ipc_pid_t;
 typedef unsigned short         __kernel_uid_t;
 typedef unsigned short         __kernel_gid_t;
+
+#ifdef CONFIG_ARM64
+typedef unsigned long          __kernel_size_t;
+typedef long                   __kernel_ssize_t;
+typedef long                   __kernel_ptrdiff_t;
+#else  /* CONFIG_ARM64 */
 typedef unsigned int           __kernel_size_t;
 typedef int                    __kernel_ssize_t;
 typedef int                    __kernel_ptrdiff_t;
+#endif /* CONFIG_ARM64 */
+
 typedef long                   __kernel_time_t;
 typedef long                   __kernel_suseconds_t;
 typedef long                   __kernel_clock_t;
diff --git a/arch/arm/include/asm/proc-armv/ptrace.h 
b/arch/arm/include/asm/proc-armv/ptrace.h
index 79cc644..fd280cb 100644
--- a/arch/arm/include/asm/proc-armv/ptrace.h
+++ b/arch/arm/include/asm/proc-armv/ptrace.h
@@ -12,6 +12,25 @@
 
 #include <linux/config.h>
 
+#ifdef CONFIG_ARM64
+
+#define PCMASK         0
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This struct defines the way the registers are stored
+ * on the stack during an exception.
+ */
+struct pt_regs {
+       unsigned long elr;
+       unsigned long regs[31];
+};
+
+#endif /* __ASSEMBLY__ */
+
+#else  /* CONFIG_ARM64 */
+
 #define USR26_MODE     0x00
 #define FIQ26_MODE     0x01
 #define IRQ26_MODE     0x02
@@ -106,4 +125,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
 
 #endif /* __ASSEMBLY__ */
 
+#endif /* CONFIG_ARM64 */
+
 #endif
diff --git a/arch/arm/include/asm/proc-armv/system.h 
b/arch/arm/include/asm/proc-armv/system.h
index b4cfa68..19b2b44 100644
--- a/arch/arm/include/asm/proc-armv/system.h
+++ b/arch/arm/include/asm/proc-armv/system.h
@@ -15,6 +15,60 @@
 /*
  * Save the current interrupt enable state & disable IRQs
  */
+#ifdef CONFIG_ARM64
+
+/*
+ * Save the current interrupt enable state
+ * and disable IRQs/FIQs
+ */
+#define local_irq_save(flags)                                  \
+       ({                                                      \
+       asm volatile(                                           \
+       "mrs    %0, daif"                                       \
+       "msr    daifset, #3"                                    \
+       : "=r" (flags)                                          \
+       :                                                       \
+       : "memory");                                            \
+       })
+
+/*
+ * restore saved IRQ & FIQ state
+ */
+#define local_irq_restore(flags)                               \
+       ({                                                      \
+       asm volatile(                                           \
+       "msr    daif, %0"                                       \
+       :                                                       \
+       : "r" (flags)                                           \
+       : "memory");                                            \
+       })
+
+/*
+ * Enable IRQs/FIQs
+ */
+#define local_irq_enable()                                     \
+       ({                                                      \
+       asm volatile(                                           \
+       "msr    daifclr, #3"                                    \
+       :                                                       \
+       :                                                       \
+       : "memory");                                            \
+       })
+
+/*
+ * Disable IRQs/FIQs
+ */
+#define local_irq_disable()                                    \
+       ({                                                      \
+       asm volatile(                                           \
+       "msr    daifset, #3"                                    \
+       :                                                       \
+       :                                                       \
+       : "memory");                                            \
+       })
+
+#else  /* CONFIG_ARM64 */
+
 #define local_irq_save(x)                                      \
        ({                                                      \
                unsigned long temp;                             \
@@ -109,7 +163,10 @@
        : "r" (x)                                               \
        : "memory")
 
-#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110)
+#endif /* CONFIG_ARM64 */
+
+#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \
+       defined(CONFIG_ARM64)
 /*
  * On the StrongARM, "swp" is terminally broken since it bypasses the
  * cache totally.  This means that the cache becomes inconsistent, and,
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 760345f..0e7c455 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -1,6 +1,79 @@
 #ifndef __ASM_ARM_SYSTEM_H
 #define __ASM_ARM_SYSTEM_H
 
+#ifdef CONFIG_ARM64
+
+/*
+ * SCTLR_EL1/SCTLR_EL2 bits definitions
+ */
+#define CR_M           (1 << 0)        /* MMU enable                   */
+#define CR_A           (1 << 1)        /* Alignment abort enable       */
+#define CR_C           (1 << 2)        /* Dcache enable                */
+#define CR_SA          (1 << 3)        /* Stack Alignment Check Enable */
+#define CR_I           (1 << 12)       /* Icache enable                */
+#define CR_WXN         (1 << 19)       /* Write Permision Imply XN     */
+#define CR_EE          (1 << 25)       /* Exception (Big) Endian       */
+
+#define PGTABLE_SIZE   (0x10000)
+
+#ifndef __ASSEMBLY__
+
+#define isb()                          \
+       ({asm volatile(                 \
+       "isb" : : : "memory");          \
+       })
+
+#define wfi()                          \
+       ({asm volatile(                 \
+       "wfi" : : : "memory");          \
+       })
+
+static inline unsigned int curent_el(void)
+{
+       unsigned int el;
+       asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
+       return el >> 2;
+}
+
+static inline unsigned int get_sctlr(void)
+{
+       unsigned int el, val;
+
+       el = curent_el();
+       if (el == 1)
+               asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
+       else if (el == 2)
+               asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
+       else
+               panic("Not Supported Exception Level");
+
+       return val;
+}
+
+static inline void set_sctlr(unsigned int val)
+{
+       unsigned int el;
+
+       el = curent_el();
+       if (el == 1)
+               asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
+       else if (el == 2)
+               asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
+       else
+               panic("Not Supported Exception Level");
+
+       asm volatile("isb");
+}
+
+void __asm_flush_dcache_all(void);
+void __asm_flush_dcache_range(u64 start, u64 end);
+void __asm_invalidate_tlb_all(void);
+void __asm_invalidate_icache_all(void);
+
+#endif /* __ASSEMBLY__ */
+
+#else /* CONFIG_ARM64 */
+
 #ifdef __KERNEL__
 
 #define CPU_ARCH_UNKNOWN       0
@@ -45,6 +118,8 @@
 #define CR_AFE (1 << 29)       /* Access flag enable                   */
 #define CR_TE  (1 << 30)       /* Thumb exception enable               */
 
+#define PGTABLE_SIZE           (4096 * 4)
+
 /*
  * This is used to ensure the compiler did actually allocate the register we
  * asked it for some inline assembly sequences.  Apparently we can't trust
@@ -132,4 +207,6 @@ void mmu_page_table_flush(unsigned long start, unsigned 
long stop);
 
 #endif /* __KERNEL__ */
 
+#endif /* CONFIG_ARM64 */
+
 #endif
diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
index 71dc049..2326420 100644
--- a/arch/arm/include/asm/types.h
+++ b/arch/arm/include/asm/types.h
@@ -39,7 +39,11 @@ typedef unsigned int u32;
 typedef signed long long s64;
 typedef unsigned long long u64;
 
+#ifdef CONFIG_ARM64
+#define BITS_PER_LONG 64
+#else  /* CONFIG_ARM64 */
 #define BITS_PER_LONG 32
+#endif /* CONFIG_ARM64 */
 
 /* Dma addresses are 32-bits wide.  */
 
diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h
index 2b5fce8..cb81232 100644
--- a/arch/arm/include/asm/u-boot.h
+++ b/arch/arm/include/asm/u-boot.h
@@ -44,6 +44,10 @@ typedef struct bd_info {
 #endif /* !CONFIG_SYS_GENERIC_BOARD */
 
 /* For image.h:image_check_target_arch() */
+#ifndef CONFIG_ARM64
 #define IH_ARCH_DEFAULT IH_ARCH_ARM
+#else
+#define IH_ARCH_DEFAULT IH_ARCH_ARM64
+#endif
 
 #endif /* _U_BOOT_H_ */
diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
index 44593a8..0a228fb 100644
--- a/arch/arm/include/asm/unaligned.h
+++ b/arch/arm/include/asm/unaligned.h
@@ -8,7 +8,7 @@
 /*
  * Select endianness
  */
-#ifndef __ARMEB__
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 #define get_unaligned  __get_unaligned_le
 #define put_unaligned  __put_unaligned_le
 #else
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 4e78723..03c31c7 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -10,6 +10,9 @@ include $(TOPDIR)/config.mk
 LIB    = $(obj)lib$(ARCH).o
 LIBGCC = $(obj)libgcc.o
 
+ifdef CONFIG_ARM64
+SOBJS-y += crt0_64.o
+else
 GLSOBJS        += _ashldi3.o
 GLSOBJS        += _ashrdi3.o
 GLSOBJS        += _divsi3.o
@@ -21,9 +24,14 @@ GLSOBJS      += _umodsi3.o
 GLCOBJS        += div0.o
 
 SOBJS-y += crt0.o
+endif
 
 ifndef CONFIG_SPL_BUILD
+ifdef CONFIG_ARM64
+SOBJS-y += relocate_64.o
+else
 SOBJS-y += relocate.o
+endif
 ifndef CONFIG_SYS_GENERIC_BOARD
 COBJS-y        += board.o
 endif
@@ -38,11 +46,17 @@ else
 COBJS-$(CONFIG_SPL_FRAMEWORK) += spl.o
 endif
 
+ifdef CONFIG_ARM64
+COBJS-y        += interrupts_64.o
+else
 COBJS-y        += interrupts.o
+endif
 COBJS-y        += reset.o
 
 COBJS-y        += cache.o
+ifndef CONFIG_ARM64
 COBJS-y        += cache-cp15.o
+endif
 
 SRCS   := $(GLSOBJS:.o=.S) $(GLCOBJS:.o=.c) \
           $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c
index 34f50b0..c4904b4 100644
--- a/arch/arm/lib/board.c
+++ b/arch/arm/lib/board.c
@@ -344,7 +344,7 @@ void board_init_f(ulong bootflag)
 
 #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF))
        /* reserve TLB table */
-       gd->arch.tlb_size = 4096 * 4;
+       gd->arch.tlb_size = PGTABLE_SIZE;
        addr -= gd->arch.tlb_size;
 
        /* round down to next 64 kB limit */
@@ -419,6 +419,7 @@ void board_init_f(ulong bootflag)
        }
 #endif
 
+#ifndef CONFIG_ARM64
        /* setup stackpointer for exeptions */
        gd->irq_sp = addr_sp;
 #ifdef CONFIG_USE_IRQ
@@ -431,6 +432,10 @@ void board_init_f(ulong bootflag)
 
        /* 8-byte alignment for ABI compliance */
        addr_sp &= ~0x07;
+#else  /* CONFIG_ARM64 */
+       /* 16-byte alignment for ABI compliance */
+       addr_sp &= ~0x0f;
+#endif /* CONFIG_ARM64 */
 #else
        addr_sp += 128; /* leave 32 words for abort-stack   */
        gd->irq_sp = addr_sp;
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index f476a89..b372d99 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -240,6 +240,21 @@ static void boot_prep_linux(bootm_headers_t *images)
 /* Subcommand: GO */
 static void boot_jump_linux(bootm_headers_t *images, int flag)
 {
+#ifdef CONFIG_ARM64
+       void (*kernel_entry)(void *fdt_addr);
+       int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+       kernel_entry = (void (*)(void *fdt_addr))images->ep;
+
+       debug("## Transferring control to Linux (at address %lx)...\n",
+               (ulong) kernel_entry);
+       bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+       announce_and_cleanup(fake);
+
+       if (!fake)
+               kernel_entry(images->ft_addr);
+#else
        unsigned long machid = gd->bd->bi_arch_number;
        char *s;
        void (*kernel_entry)(int zero, int arch, uint params);
@@ -266,6 +281,7 @@ static void boot_jump_linux(bootm_headers_t *images, int 
flag)
 
        if (!fake)
                kernel_entry(0, machid, r2);
+#endif
 }
 
 /* Main Entry point for arm bootm implementation
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
new file mode 100644
index 0000000..7756396
--- /dev/null
+++ b/arch/arm/lib/crt0_64.S
@@ -0,0 +1,113 @@
+/*
+ * crt0 - C-runtime startup Code for AArch64 U-Boot
+ *
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * (C) Copyright 2012
+ * Albert ARIBAUD <albert.u.b...@aribaud.net>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+#include <asm/macro.h>
+#include <linux/linkage.h>
+
+/*
+ * This file handles the target-independent stages of the U-Boot
+ * start-up where a C runtime environment is needed. Its entry point
+ * is _main and is branched into from the target's start.S file.
+ *
+ * _main execution sequence is:
+ *
+ * 1. Set up initial environment for calling board_init_f().
+ *    This environment only provides a stack and a place to store
+ *    the GD ('global data') structure, both located in some readily
+ *    available RAM (SRAM, locked cache...). In this context, VARIABLE
+ *    global data, initialized or not (BSS), are UNAVAILABLE; only
+ *    CONSTANT initialized data are available.
+ *
+ * 2. Call board_init_f(). This function prepares the hardware for
+ *    execution from system RAM (DRAM, DDR...) As system RAM may not
+ *    be available yet, , board_init_f() must use the current GD to
+ *    store any data which must be passed on to later stages. These
+ *    data include the relocation destination, the future stack, and
+ *    the future GD location.
+ *
+ * (the following applies only to non-SPL builds)
+ *
+ * 3. Set up intermediate environment where the stack and GD are the
+ *    ones allocated by board_init_f() in system RAM, but BSS and
+ *    initialized non-const data are still not available.
+ *
+ * 4. Call relocate_code(). This function relocates U-Boot from its
+ *    current location into the relocation destination computed by
+ *    board_init_f().
+ *
+ * 5. Set up final environment for calling board_init_r(). This
+ *    environment has BSS (initialized to 0), initialized non-const
+ *    data (initialized to their intended value), and stack in system
+ *    RAM. GD has retained values set by board_init_f(). Some CPUs
+ *    have some work left to do at this point regarding memory, so
+ *    call c_runtime_cpu_setup.
+ *
+ * 6. Branch to board_init_r().
+ */
+
+ENTRY(_main)
+
+/*
+ * Set up initial C runtime environment and call board_init_f(0).
+ */
+       ldr     x0, =(CONFIG_SYS_INIT_SP_ADDR)
+       sub     x0, x0, #GD_SIZE        /* allocate one GD above SP */
+       bic     sp, x0, #0xf    /* 16-byte alignment for ABI compliance */
+       mov     x18, sp                 /* GD is above SP */
+       mov     x0, #0
+       bl      board_init_f
+
+/*
+ * Set up intermediate environment (new sp and gd) and call
+ * relocate_code(addr_moni). Trick here is that we'll return
+ * 'here' but relocated.
+ */
+       ldr     x0, [x18, #GD_START_ADDR_SP]    /* x0 <- gd->start_addr_sp */
+       bic     sp, x0, #0xf    /* 16-byte alignment for ABI compliance */
+       ldr     x18, [x18, #GD_BD]              /* x18 <- gd->bd */
+       sub     x18, x18, #GD_SIZE              /* new GD is below bd */
+
+       adr     lr, relocation_return
+       ldr     x9, [x18, #GD_RELOC_OFF]        /* x9 <- gd->reloc_off */
+       add     lr, lr, x9      /* new return address after relocation */
+       ldr     x0, [x18, #GD_RELOCADDR]        /* x0 <- gd->relocaddr */
+       b       relocate_code
+
+relocation_return:
+
+/*
+ * Set up final (full) environment
+ */
+       bl      c_runtime_cpu_setup             /* still call old routine */
+
+/*
+ * Clear BSS section
+ */
+       ldr     x0, =__bss_start                /* this is auto-relocated! */
+       ldr     x1, =__bss_end                  /* this is auto-relocated! */
+       mov     x2, #0
+clear_loop:
+       str     x2, [x0]
+       add     x0, x0, #8
+       cmp     x0, x1
+       b.lo    clear_loop
+
+       /* call board_init_r(gd_t *id, ulong dest_addr) */
+       mov     x0, x18                         /* gd_t */
+       ldr     x1, [x18, #GD_RELOCADDR]        /* dest_addr */
+       b       board_init_r                    /* PC relative jump */
+
+       /* NOTREACHED - board_init_r() does not return */
+
+ENDPROC(_main)
diff --git a/arch/arm/lib/interrupts_64.c b/arch/arm/lib/interrupts_64.c
new file mode 100644
index 0000000..b476722
--- /dev/null
+++ b/arch/arm/lib/interrupts_64.c
@@ -0,0 +1,120 @@
+/*
+ * (C) Copyright 2013
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <common.h>
+#include <linux/compiler.h>
+
+
+int interrupt_init(void)
+{
+       return 0;
+}
+
+void enable_interrupts(void)
+{
+       return;
+}
+
+int disable_interrupts(void)
+{
+       return 0;
+}
+
+void show_regs(struct pt_regs *regs)
+{
+       int i;
+
+       printf("ELR:     %lx\n", regs->elr);
+       printf("LR:      %lx\n", regs->regs[30]);
+       for (i = 0; i < 29; i += 2)
+               printf("x%-2d: %016lx x%-2d: %016lx\n",
+                      i, regs->regs[i], i+1, regs->regs[i+1]);
+       printf("\n");
+}
+
+/*
+ * do_bad_sync handles the impossible case in the Synchronous Abort vector.
+ */
+void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_irq handles the impossible case in the Irq vector.
+ */
+void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_fiq handles the impossible case in the Fiq vector.
+ */
+void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_bad_error handles the impossible case in the Error vector.
+ */
+void do_bad_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_sync handles the Synchronous Abort exception.
+ */
+void do_sync(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_irq handles the Irq exception.
+ */
+void do_irq(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("\"Irq\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_fiq handles the Fiq exception.
+ */
+void do_fiq(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("\"Fiq\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
+
+/*
+ * do_error handles the Error exception.
+ * Errors are more likely to be processor specific,
+ * it is defined with weak attribute and can be redefined
+ * in processor specific code.
+ */
+void __weak do_error(struct pt_regs *pt_regs, unsigned int esr)
+{
+       printf("\"Error\" handler, esr 0x%08x\n", esr);
+       show_regs(pt_regs);
+       panic("Resetting CPU ...\n");
+}
diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S
new file mode 100644
index 0000000..7fba9e2
--- /dev/null
+++ b/arch/arm/lib/relocate_64.S
@@ -0,0 +1,58 @@
+/*
+ * relocate - common relocation function for AArch64 U-Boot
+ *
+ * (C) Copyright 2013
+ * Albert ARIBAUD <albert.u.b...@aribaud.net>
+ * David Feng <feng...@phytium.com.cn>
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <linux/linkage.h>
+
+/*
+ * void relocate_code (addr_moni)
+ *
+ * This function relocates the monitor code.
+ * x0 holds the destination address.
+ */
+ENTRY(relocate_code)
+       /*
+        * Copy u-boot from flash to RAM
+        */
+       ldr     x1, =__image_copy_start /* x1 <- SRC &__image_copy_start */
+       subs    x9, x0, x1              /* x9 <- relocation offset */
+       b.eq    relocate_done           /* skip relocation */
+       ldr     x2, =__image_copy_end   /* x2 <- SRC &__image_copy_end */
+
+copy_loop:
+       ldp     x10, x11, [x1], #16     /* copy from source address [x1] */
+       stp     x10, x11, [x0], #16     /* copy to   target address [x0] */
+       cmp     x1, x2                  /* until source end address [x2] */
+       b.lo    copy_loop
+
+       /*
+        * Fix .rela.dyn relocations
+        */
+       ldr     x2, =__rel_dyn_start    /* x2 <- SRC &__rel_dyn_start */
+       ldr     x3, =__rel_dyn_end      /* x3 <- SRC &__rel_dyn_end */
+fixloop:
+       ldp     x0, x1, [x2], #16       /* (x0,x1) <- (SRC location, fixup) */
+       ldr     x4, [x2], #8            /* x4 <- addend */
+       and     x1, x1, #0xffffffff
+       cmp     x1, #1027               /* relative fixup? */
+       bne     fixnext
+
+       /* relative fix: store addend plus offset at dest location */
+       add     x0, x0, x9
+       add     x4, x4, x9
+       str     x4, [x0]
+fixnext:
+       cmp     x2, x3
+       b.lo    fixloop
+
+relocate_done:
+       ret
+ENDPROC(relocate_code)
diff --git a/common/image.c b/common/image.c
index b0ae58f..4145354 100644
--- a/common/image.c
+++ b/common/image.c
@@ -81,6 +81,7 @@ static const table_entry_t uimage_arch[] = {
        {       IH_ARCH_NDS32,          "nds32",        "NDS32",        },
        {       IH_ARCH_OPENRISC,       "or1k",         "OpenRISC 1000",},
        {       IH_ARCH_SANDBOX,        "sandbox",      "Sandbox",      },
+       {       IH_ARCH_ARM64,          "arm64",        "AArch64",      },
        {       -1,                     "",             "",             },
 };
 
diff --git a/doc/README.arm64 b/doc/README.arm64
new file mode 100644
index 0000000..b7800cb
--- /dev/null
+++ b/doc/README.arm64
@@ -0,0 +1,45 @@
+U-boot for arm64
+
+Summary
+=======
+No hardware platform of arm64 is available now. The u-boot is
+simulated on Foundation Model and Fast Model for ARMv8.
+
+Notes
+=====
+
+1. Currenly, u-boot could be running at EL1 or EL2.
+
+2. U-boot for arm64 is compiled with AArch64-gcc. AArch64-gcc
+   use rela relocation format, a tool(tools/relocate-rela) by Scott Wood
+   is used to encode the initial addend of rela to u-boot.bin. After running,
+   the u-boot will be relocated to destination again.
+
+3. Fdt should be placed at a 2-megabyte boundary and within the first 512
+   megabytes from the start of the kernel image. So, fdt_high should be
+   defined specially.
+   Please reference linux/Documentation/arm64/booting.txt for detail.
+
+4. Spin-table is used to wake up secondary processors. One location
+   (or per processor location) is defined to hold the kernel entry point
+   for secondary processors. It must be ensured that the location is
+   accessible and zero immediately after secondary processor
+   enter slave_cpu branch execution in start.S. The location address
+   is encoded in cpu node of DTS. Linux kernel store the entry point
+   of secondary processors to it and send event to wakeup secondary
+   processors.
+   Please reference linux/Documentation/arm64/booting.txt for detail.
+
+5. Generic board is supported.
+
+6. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and
+   aarch32 specific codes.
+
+Contributor
+===========
+   Tom Rini       <tr...@ti.com>
+   Scott Wood     <scottw...@freescale.com>
+   York Sun       <york...@freescale.com>
+   Simon Glass    <s...@chromium.org>
+   Sharma Bhupesh <bhupesh.sha...@freescale.com>
+   Rob Herring    <robherri...@gmail.com>
diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c
index 8fb1765..fc5d7ef 100644
--- a/examples/standalone/stubs.c
+++ b/examples/standalone/stubs.c
@@ -39,6 +39,20 @@ gd_t *global_data;
 "      bctr\n"                         \
        : : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11");
 #elif defined(CONFIG_ARM)
+#ifdef CONFIG_ARM64
+/*
+ * x18 holds the pointer to the global_data, x9 is a call-clobbered
+ * register
+ */
+#define EXPORT_FUNC(x) \
+       asm volatile (                  \
+"      .globl " #x "\n"                \
+#x ":\n"                               \
+"      ldr     x9, [x18, %0]\n"                \
+"      ldr     x9, [x9, %1]\n"         \
+"      br      x9\n"           \
+       : : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9");
+#else
 /*
  * r8 holds the pointer to the global_data, ip is a call-clobbered
  * register
@@ -50,6 +64,7 @@ gd_t *global_data;
 "      ldr     ip, [r8, %0]\n"         \
 "      ldr     pc, [ip, %1]\n"         \
        : : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip");
+#endif
 #elif defined(CONFIG_MIPS)
 /*
  * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call-
diff --git a/include/image.h b/include/image.h
index ee6eb8d..7de2bb2 100644
--- a/include/image.h
+++ b/include/image.h
@@ -156,6 +156,7 @@ struct lmb;
 #define IH_ARCH_SANDBOX                19      /* Sandbox architecture (test 
only) */
 #define IH_ARCH_NDS32          20      /* ANDES Technology - NDS32  */
 #define IH_ARCH_OPENRISC        21     /* OpenRISC 1000  */
+#define IH_ARCH_ARM64          22      /* ARM64        */
 
 /*
  * Image Types
-- 
1.7.9.5


_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to