[osv-dev] [PATCH] aarch64: move kernel to 63rd GB of virtual memory
This patch modifies the aarch64 port to move the kernel from the 2nd to 63rd GB or virtual memory. It also adjusts the early preboot and boot assembly to dynamically adjust the early phys/virt mapping tables to make it work regardless where in physical memory the kernel and DTB is loaded. This allows us to use the same kernel binary on QEMU and Firecracker without having to adjust relevant variables in the makefile and rebuild it to accomodate the fact that each hypervisor would load loader.img in different area of physical memory. Prior to this patch the kernel would be mapped 1:1 in the first 4 GB of phys/virt memory. In essence, this patch enhances the preboot.S to dynamically identify location of start_elf to jump to. Then it modifies boot.S to dynamically calculate the offset between where kernel is located in virtual memory and where it is loaded in physical memory and then adjust the 63rd GB of early boot mapping tables accordingly. Finally it also adjust the virt/phys and phys/virt translation functions in core/mmu.cc and other aspects in elf.cc and makefile. After the patch the virtual memory layout would look like this in QEMU: vaddrpaddr size perm memattr name 800 8001 rwxp dev gic_dist 801 8011 rwxp dev gic_cpu 900 900 1000 rwxp dev pl011 901 901 1000 rwxp dev pl031 1000 1000 2eff rwxp dev pci_mem 3eff 3eff1 rwxp dev pci_io fc000 4000 84e000 rwxp normal kernel 401000 401000 1000 rwxp dev pci_cfg 8a00 a00 200 rwxp normal virtio_mmio_cfg 8a000200 a000200 200 rwxp normal virtio_mmio_cfg 8a000400 a000400 200 rwxp normal virtio_mmio_cfg 8a000600 a000600 200 rwxp normal virtio_mmio_cfg 8a000800 a000800 200 rwxp normal virtio_mmio_cfg 8a000a00 a000a00 200 rwxp normal virtio_mmio_cfg 8a000c00 a000c00 200 rwxp normal virtio_mmio_cfg 8a000e00 a000e00 200 rwxp normal virtio_mmio_cfg 80004084e000 4084e000 7f7b2000 rwxp normal main 90004084e000 4084e000 7f7b2000 rwxp normal page a0004084e000 4084e000 7f7b2000 rwxp normal mempool Fixes #1087 Signed-off-by: Waldemar Kozaczuk --- Makefile | 27 +--- arch/aarch64/arch-dtb.cc | 10 +-- arch/aarch64/arch-mmu.hh | 1 + arch/aarch64/arch-setup.cc | 10 ++- arch/aarch64/arch.hh | 2 +- arch/aarch64/boot.S| 138 ++--- arch/aarch64/loader.ld | 5 +- arch/aarch64/preboot.S | 20 -- core/elf.cc| 4 ++ core/mmu.cc| 18 + loader.cc | 16 - 11 files changed, 218 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index 82885016..c6dff37f 100644 --- a/Makefile +++ b/Makefile @@ -318,8 +318,12 @@ kernel-defines = -D_KERNEL $(source-dialects) $(cc-hide-flags) $(gc-flags) # To add something that will *not* be part of the main kernel, you can do: # # mydir/*.o EXTRA_FLAGS = +ifeq ($(arch),x64) EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_BASE=$(kernel_base) -DOSV_KERNEL_VM_BASE=$(kernel_vm_base) \ -DOSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) -DOSV_LZKERNEL_BASE=$(lzkernel_base) +else +EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_VM_BASE=$(kernel_vm_base) +endif EXTRA_LIBS = COMMON = $(autodepend) -g -Wall -Wno-pointer-arith $(CFLAGS_WERROR) -Wformat=0 -Wno-format-security \ -D __BSD_VISIBLE=1 -U _FORTIFY_SOURCE -fno-stack-protector $(INCLUDES) \ @@ -497,12 +501,13 @@ acpi = $(patsubst %.c, %.o, $(acpi-source)) $(acpi:%=$(out)/%): CFLAGS += -fno-strict-aliasing -Wno-stringop-truncation +kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - $(kernel_base) )) )) + endif # x64 ifeq ($(arch),aarch64) -kernel_base := 0x4008 -kernel_vm_base := $(kernel_base) +kernel_vm_base := 0xfc008 #63GB app_local_exec_tls_size := 0x40 include $(libfdt_base)/Makefile.libfdt @@ -516,7 +521,7 @@ $(out)/preboot.bin: $(out)/preboot.elf $(call quiet, $(OBJCOPY) -O binary $^ $@, OBJCOPY $@) edata = $(shell readelf --syms $(out)/loader.elf | grep "\.edata" | awk '{print "0x" $$2}') -image_size = $$(( $(edata) - $(kernel_base) )) +image_size = $$(( $(edata) - $(kernel_vm_base) )) $(out)/loader.img: $(out)/preboot.bin $(out)/loader-stripped.elf $(call quiet, dd if=$(out)/preboot.bin of=$@ > /dev/null 2>&1, DD $@ preboot.bin) @@ -526,8 +531,6 @@ $(out)/loader.img: $(out)/preboot.bin $(out)/loader-stripped.elf endif # aarch64 -kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - $(kernel_base) )) )) -
[osv-dev] [PATCH] aarch64: fix atomic_fetchadd_int and atomic_fetchadd_long
This patch fixes a subtle yet critical bug in the implementation of the atomic_fetchadd_* functions used in the bsd subtree of OSv source code. This bug is a root cause of the issues #1189 and #1190 and affects stability of ZFS and networking stack on aarch64. The atomic_fetchadd_*() are implemented in inlined assembly and provide the functionality to atomically add/subtract to/from a 4- or 8-bytes long value in memory and return old value of it before update. The assembly made of four instructions in essence implements simple loop to read a value from memory, add a specified delta, update memory with new value and finally check if the update was successful. The pre-patch version of this code is almost correct and works properly if the atomic update is successful in the 1st attempt. However it works incorrectly if that update fails and it needs to retry which is quite rare. As an example the generate machine code might look like this: 0x10119690 <+0>:ldaxr x2, [x0] 0x10119694 <+4>:add x1, x1, x2 0x10119698 <+8>:stlxr w3, x1, [x0] 0x1011969c <+12>: cbnzw3, 0x10119690 One can eventually notice that the x1 register holding a result (sum) to be updated to memory is re-used across iterations and would behave like an accumulator which is wrong. We have to fix the inline assembly to make sure that separate register is used for that. Rather than trying to fix existing code, this patch updates both atomic_fetchadd_*() functions with the copies of atomic_fetchadd_32 and atomic_fetchadd_64 from current enough version of FreeBSD code - sys/arm64/include/atomic.h@119a353e3d9d45650e109600160caca173ac8a53 and tweaked to match the types of val, tmp and res variables. Please note the FreeBSD version of the code uses ldxr/stxr instructions instead of ldaxr/stlxr ones with require/release memory ordering semantics which are excessive for atomic_fetchadd_*(). Fixes #1189 Fixes #1190 Signed-off-by: Waldemar Kozaczuk --- bsd/aarch64/machine/atomic.h | 46 ++-- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/bsd/aarch64/machine/atomic.h b/bsd/aarch64/machine/atomic.h index eb447d8e..55532585 100644 --- a/bsd/aarch64/machine/atomic.h +++ b/bsd/aarch64/machine/atomic.h @@ -83,28 +83,38 @@ int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int val) { -u_int result; -u_int status; -__asm __volatile("1: ldaxr %w0, %1 ; " - " add %w2, %w2, %w0 ; " - " stlxr %w3, %w2, %1 ; " - " cbnz %w3, 1b ; " - : "=&r"(result), "+Q"(*p), "+r"(val), "=&r"(status)); - -return result; +u_int tmp, ret; +u_int res; + +__asm __volatile( +"1: ldxr%w2, [%3] \n" +" add %w0, %w2, %w4 \n" +" stxr%w1, %w0, [%3] \n" +" cbnz%w1, 1b\n" +: "=&r"(tmp), "=&r"(res), "=&r"(ret) +: "r" (p), "r" (val) +: "memory" +); + +return ret; } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long val) { -u_long result; -u_int status; -__asm __volatile("1: ldaxr %0, %1 ; " - " add %2, %2, %0 ; " - " stlxr %w3, %2, %1 ; " - " cbnz %w3, 1b ; " - : "=&r"(result), "+Q"(*p), "+r"(val), "=&r"(status)); - -return result; +u_long tmp, ret; +u_int res; + +__asm __volatile( +"1: ldxr%2, [%3] \n" +" add %0, %2, %4\n" +" stxr%w1, %0, [%3] \n" +" cbnz%w1, 1b \n" +: "=&r"(tmp), "=&r"(res), "=&r"(ret) +: "r" (p), "r" (val) +: "memory" +); + +return ret; } static __inline void atomic_store_rel_int(volatile u_int *p, u_int val) -- 2.27.0 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220420181404.53392-1-jwkozaczuk%40gmail.com.