[osv-dev] [PATCH] aarch64: move kernel to 63rd GB of virtual memory

2022-04-20 Thread Waldemar Kozaczuk
This patch modifies the aarch64 port to move the kernel from
the 2nd to 63rd GB or virtual memory. It also adjusts the early preboot and boot
assembly to dynamically adjust the early phys/virt mapping tables to make
it work regardless where in physical memory the kernel and DTB is loaded.
This allows us to use the same kernel binary on QEMU and Firecracker without
having to adjust relevant variables in the makefile and rebuild it to accomodate
the fact that each hypervisor would load loader.img in different area
of physical memory. Prior to this patch the kernel would be mapped 1:1
in the first 4 GB of phys/virt memory.

In essence, this patch enhances the preboot.S to dynamically identify
location of start_elf to jump to. Then it modifies boot.S to dynamically
calculate the offset between where kernel is located in virtual memory and where
it is loaded in physical memory and then adjust the 63rd GB of early boot
mapping tables accordingly. Finally it also adjust the virt/phys and phys/virt
translation functions in core/mmu.cc and other aspects in elf.cc and makefile.

After the patch the virtual memory layout would look like this in QEMU:

   vaddrpaddr size perm memattr name
 800  8001 rwxp dev gic_dist
 801  8011 rwxp dev gic_cpu
 900  900 1000 rwxp dev pl011
 901  901 1000 rwxp dev pl031
1000 1000 2eff rwxp dev pci_mem
3eff 3eff1 rwxp dev pci_io
   fc000 4000   84e000 rwxp  normal kernel
  401000   401000 1000 rwxp dev pci_cfg
8a00  a00  200 rwxp  normal virtio_mmio_cfg
8a000200  a000200  200 rwxp  normal virtio_mmio_cfg
8a000400  a000400  200 rwxp  normal virtio_mmio_cfg
8a000600  a000600  200 rwxp  normal virtio_mmio_cfg
8a000800  a000800  200 rwxp  normal virtio_mmio_cfg
8a000a00  a000a00  200 rwxp  normal virtio_mmio_cfg
8a000c00  a000c00  200 rwxp  normal virtio_mmio_cfg
8a000e00  a000e00  200 rwxp  normal virtio_mmio_cfg
80004084e000 4084e000 7f7b2000 rwxp  normal main
90004084e000 4084e000 7f7b2000 rwxp  normal page
a0004084e000 4084e000 7f7b2000 rwxp  normal mempool

Fixes #1087

Signed-off-by: Waldemar Kozaczuk 
---
 Makefile   |  27 +---
 arch/aarch64/arch-dtb.cc   |  10 +--
 arch/aarch64/arch-mmu.hh   |   1 +
 arch/aarch64/arch-setup.cc |  10 ++-
 arch/aarch64/arch.hh   |   2 +-
 arch/aarch64/boot.S| 138 ++---
 arch/aarch64/loader.ld |   5 +-
 arch/aarch64/preboot.S |  20 --
 core/elf.cc|   4 ++
 core/mmu.cc|  18 +
 loader.cc  |  16 -
 11 files changed, 218 insertions(+), 33 deletions(-)

diff --git a/Makefile b/Makefile
index 82885016..c6dff37f 100644
--- a/Makefile
+++ b/Makefile
@@ -318,8 +318,12 @@ kernel-defines = -D_KERNEL $(source-dialects) 
$(cc-hide-flags) $(gc-flags)
 # To add something that will *not* be part of the main kernel, you can do:
 #
 #   mydir/*.o EXTRA_FLAGS = 
+ifeq ($(arch),x64)
 EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_BASE=$(kernel_base) 
-DOSV_KERNEL_VM_BASE=$(kernel_vm_base) \
-DOSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) 
-DOSV_LZKERNEL_BASE=$(lzkernel_base)
+else
+EXTRA_FLAGS = -D__OSV_CORE__ -DOSV_KERNEL_VM_BASE=$(kernel_vm_base)
+endif
 EXTRA_LIBS =
 COMMON = $(autodepend) -g -Wall -Wno-pointer-arith $(CFLAGS_WERROR) -Wformat=0 
-Wno-format-security \
-D __BSD_VISIBLE=1 -U _FORTIFY_SOURCE -fno-stack-protector $(INCLUDES) \
@@ -497,12 +501,13 @@ acpi = $(patsubst %.c, %.o, $(acpi-source))
 
 $(acpi:%=$(out)/%): CFLAGS += -fno-strict-aliasing -Wno-stringop-truncation
 
+kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - 
$(kernel_base) )) ))
+
 endif # x64
 
 ifeq ($(arch),aarch64)
 
-kernel_base := 0x4008
-kernel_vm_base := $(kernel_base)
+kernel_vm_base := 0xfc008 #63GB
 app_local_exec_tls_size := 0x40
 
 include $(libfdt_base)/Makefile.libfdt
@@ -516,7 +521,7 @@ $(out)/preboot.bin: $(out)/preboot.elf
$(call quiet, $(OBJCOPY) -O binary $^ $@, OBJCOPY $@)
 
 edata = $(shell readelf --syms $(out)/loader.elf | grep "\.edata" | awk 
'{print "0x" $$2}')
-image_size = $$(( $(edata) - $(kernel_base) ))
+image_size = $$(( $(edata) - $(kernel_vm_base) ))
 
 $(out)/loader.img: $(out)/preboot.bin $(out)/loader-stripped.elf
$(call quiet, dd if=$(out)/preboot.bin of=$@ > /dev/null 2>&1, DD $@ 
preboot.bin)
@@ -526,8 +531,6 @@ $(out)/loader.img: $(out)/preboot.bin 
$(out)/loader-stripped.elf
 
 endif # aarch64
 
-kernel_vm_shift := $(shell printf "0x%X" $(shell expr $$(( $(kernel_vm_base) - 
$(kernel_base) )) ))
-

[osv-dev] [PATCH] aarch64: fix atomic_fetchadd_int and atomic_fetchadd_long

2022-04-20 Thread Waldemar Kozaczuk
This patch fixes a subtle yet critical bug in the implementation of the
atomic_fetchadd_* functions used in the bsd subtree of OSv source code.
This bug is a root cause of the issues #1189 and #1190 and affects stability
of ZFS and networking stack on aarch64.

The atomic_fetchadd_*() are implemented in inlined assembly and provide the
functionality to atomically add/subtract to/from a 4- or 8-bytes long value
in memory and return old value of it before update. The assembly made of
four instructions in essence implements simple loop to read a value from memory,
add a specified delta, update memory with new value and finally check if the
update was successful. The pre-patch version of this code is almost correct
and works properly if the atomic update is successful in the 1st attempt.
However it works incorrectly if that update fails and it needs to retry
which is quite rare.

As an example the generate machine code might look like this:

0x10119690 <+0>:ldaxr   x2, [x0]
0x10119694 <+4>:add x1, x1, x2
0x10119698 <+8>:stlxr   w3, x1, [x0]
0x1011969c <+12>:   cbnzw3, 0x10119690 


One can eventually notice that the x1 register holding a result (sum) to be
updated to memory is re-used across iterations and would behave like an
accumulator which is wrong. We have to fix the inline assembly to make
sure that separate register is used for that. Rather than trying to fix
existing code, this patch updates both atomic_fetchadd_*() functions
with the copies of atomic_fetchadd_32 and atomic_fetchadd_64 from current
enough version of FreeBSD code - 
sys/arm64/include/atomic.h@119a353e3d9d45650e109600160caca173ac8a53
and tweaked to match the types of val, tmp and res variables.

Please note the FreeBSD version of the code uses ldxr/stxr instructions
instead of ldaxr/stlxr ones with require/release memory ordering semantics
which are excessive for atomic_fetchadd_*().

Fixes #1189
Fixes #1190

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/aarch64/machine/atomic.h | 46 ++--
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/bsd/aarch64/machine/atomic.h b/bsd/aarch64/machine/atomic.h
index eb447d8e..55532585 100644
--- a/bsd/aarch64/machine/atomic.h
+++ b/bsd/aarch64/machine/atomic.h
@@ -83,28 +83,38 @@ int atomic_cmpset_long(volatile u_long *dst, u_long expect, 
u_long src);
 
 static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int val)
 {
-u_int result;
-u_int status;
-__asm __volatile("1: ldaxr %w0, %1 ; "
- "   add   %w2, %w2, %w0 ; "
- "   stlxr %w3, %w2, %1 ; "
- "   cbnz  %w3, 1b ; "
- : "=&r"(result), "+Q"(*p), "+r"(val), "=&r"(status));
-
-return result;
+u_int tmp, ret;
+u_int res;
+
+__asm __volatile(
+"1: ldxr%w2, [%3]  \n"
+"   add %w0, %w2, %w4  \n"
+"   stxr%w1, %w0, [%3] \n"
+"   cbnz%w1, 1b\n"
+: "=&r"(tmp), "=&r"(res), "=&r"(ret)
+: "r" (p), "r" (val)
+: "memory"
+);
+
+return ret;
 }
 
 static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long val)
 {
-u_long result;
-u_int status;
-__asm __volatile("1: ldaxr %0, %1 ; "
- "   add   %2, %2, %0 ; "
- "   stlxr %w3, %2, %1 ; "
- "   cbnz  %w3, 1b ; "
- : "=&r"(result), "+Q"(*p), "+r"(val), "=&r"(status));
-
-return result;
+u_long tmp, ret;
+u_int res;
+
+__asm __volatile(
+"1: ldxr%2, [%3]  \n"
+"   add %0, %2, %4\n"
+"   stxr%w1, %0, [%3] \n"
+"   cbnz%w1, 1b   \n"
+: "=&r"(tmp), "=&r"(res), "=&r"(ret)
+: "r" (p), "r" (val)
+: "memory"
+);
+
+return ret;
 }
 
 static __inline void atomic_store_rel_int(volatile u_int *p, u_int val)
-- 
2.27.0

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220420181404.53392-1-jwkozaczuk%40gmail.com.