Re: [RFC v5 02/10] build: Implement libnative library and the build machinery for libnative

2023-08-25 Thread Richard Henderson

On 8/25/23 03:20, Yeqi Fu wrote:

This commit implements a shared library, where native functions are
rewritten as special instructions. At runtime, user programs load
the shared library, and special instructions are executed when
native functions are called.

Signed-off-by: Yeqi Fu 

...

diff --git a/common-user/native/libnative.S b/common-user/native/libnative.S
new file mode 100644
index 00..3692eaa3cf
--- /dev/null
+++ b/common-user/native/libnative.S
@@ -0,0 +1,69 @@
+#if defined(i386) || defined(x86_64)
+/*
+ * An unused instruction is utilized to mark a native call.
+ */
+#define __SPECIAL_INSTR .byte 0x0f, 0xff;
+#define __RET_INSTR ret;
+#endif
+
+#if defined(arm) || defined(aarch64)
+/*
+ * HLT is an invalid instruction for userspace programs,
+ * and is used to mark a native call.
+ */
+#define __SPECIAL_INSTR hlt 0x;
+#if defined(aarch64)
+#define __RET_INSTR ret;
+#else
+#define __RET_INSTR bx lr;
+#endif
+#endif
+
+
+#if defined(mips) || defined(mips64)
+/*
+ * The syscall instruction contains 20 unused bits, which are typically
+ * set to 0. These bits can be used to store non-zero data,
+ * distinguishing them from a regular syscall instruction.
+ */
+#define __SPECIAL_INSTR syscall 0x;
+#define __RET_INSTR jr $ra;
+#endif
+
+/* Symbols of native functions */
+.section .data
+sym_memset:  .asciz "memset"
+sym_memcpy:  .asciz "memcpy"
+sym_strncpy:  .asciz "strncpy"
+sym_memcmp:  .asciz "memcmp"
+sym_strncmp:  .asciz "strncmp"
+sym_strcpy:  .asciz "strcpy"
+sym_strcat:  .asciz "strcat"
+sym_strcmp:  .asciz "strcmp"
+
+.macro define_function name
+\name:
+#if defined(x86_64) || defined(aarch64)
+__SPECIAL_INSTR
+.quad sym_\name
+__RET_INSTR
+#elif defined(mips64)
+.align 4
+__SPECIAL_INSTR
+.quad sym_\name
+__RET_INSTR
+#elif defined(i386) || defined(mips) || defined(arm)
+__SPECIAL_INSTR
+.long sym_\name
+__RET_INSTR
+#endif
+.endm
+
+define_function memcpy
+define_function strncpy
+define_function memset
+define_function memcmp
+define_function strncmp
+define_function strcpy
+define_function strcat
+define_function strcmp


This cannot possibly work, since none of the symbols are marked .globl, and are therefore 
not exported from your libnative.so.


Furthermore, you placed your strings in .data, but then failed to change back to .text, so 
none of the instructions are in an executable load segment.


I conclude that your testing succeeded only because no library calls were 
replaced.
This is not sufficient testing.

In review of previous versions, I have mentioned that the x86 UD0 instruction has more 
bytes than simply 0x0f 0xff -- at minimum 3 -- and moreover can be used in the assembler 
to produce pc-relative values.


We can clean up the assembly as follows.


r~


-


.macro special_instr sym
#if defined(__i386__)
ud0 \sym-1f, %eax; 1:
#elif defined(__x86_64__)
ud0 \sym(%rip), %eax
#elif defined(__arm__) || defined(__aarch64__)
hlt 0x
1:  .word   \sym - 1b
#elif defined(__mips__)
syscall 0x
1:  .word   \sym - 1b
#else
# error
#endif
.endm

.macro ret_instr
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
ret
#elif defined(__arm__)
bx  lr
#elif defined(__mips__)
jr  $ra
#else
# error
#endif
.endm

/* Symbols of native functions */

.macro define_function name
.text
\name:
special_instr 9f
ret_instr
.globl \name
.type \name, %function
.size \name, . - \name

.section .rodata
9:  .asciz  "\name"
.endm

define_function memcmp
define_function memcpy
define_function memset
define_function strcat
define_function strcmp
define_function strcpy
define_function strncmp
define_function strncpy



[RFC v5 02/10] build: Implement libnative library and the build machinery for libnative

2023-08-25 Thread Yeqi Fu
This commit implements a shared library, where native functions are
rewritten as special instructions. At runtime, user programs load
the shared library, and special instructions are executed when
native functions are called.

Signed-off-by: Yeqi Fu 
---
 Makefile|  2 +
 common-user/native/Makefile.include |  9 
 common-user/native/Makefile.target  | 22 +
 common-user/native/libnative.S  | 69 +
 configure   | 39 
 5 files changed, 141 insertions(+)
 create mode 100644 common-user/native/Makefile.include
 create mode 100644 common-user/native/Makefile.target
 create mode 100644 common-user/native/libnative.S

diff --git a/Makefile b/Makefile
index 5d48dfac18..6f6147b40f 100644
--- a/Makefile
+++ b/Makefile
@@ -182,6 +182,8 @@ SUBDIR_MAKEFLAGS=$(if $(V),,--no-print-directory --quiet)
 
 include $(SRC_PATH)/tests/Makefile.include
 
+include $(SRC_PATH)/common-user/native/Makefile.include
+
 all: recurse-all
 
 ROMS_RULES=$(foreach t, all clean distclean, $(addsuffix /$(t), $(ROMS)))
diff --git a/common-user/native/Makefile.include 
b/common-user/native/Makefile.include
new file mode 100644
index 00..40d20bcd4c
--- /dev/null
+++ b/common-user/native/Makefile.include
@@ -0,0 +1,9 @@
+.PHONY: build-native
+build-native: $(NATIVE_TARGETS:%=build-native-library-%)
+$(NATIVE_TARGETS:%=build-native-library-%): build-native-library-%:
+   $(call quiet-command, \
+   $(MAKE) -C common-user/native/$* $(SUBDIR_MAKEFLAGS), \
+   "BUILD","$* native library")
+# endif
+
+all: build-native
diff --git a/common-user/native/Makefile.target 
b/common-user/native/Makefile.target
new file mode 100644
index 00..65d90102e2
--- /dev/null
+++ b/common-user/native/Makefile.target
@@ -0,0 +1,22 @@
+# -*- Mode: makefile -*-
+#
+# Library for native calls
+#
+
+all:
+-include ../../../config-host.mak
+-include config-target.mak
+
+CFLAGS+=-shared -D $(TARGET_NAME)
+LDFLAGS+=
+
+SRC = $(SRC_PATH)/common-user/native/libnative.S
+LIBNATIVE = libnative.so
+
+all: $(LIBNATIVE)
+
+$(LIBNATIVE): $(SRC)
+   $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(EXTRA_NATIVE_CALL_FLAGS) $< -o $@ 
$(LDFLAGS)
+
+clean:
+   rm -f $(LIBNATIVE)
diff --git a/common-user/native/libnative.S b/common-user/native/libnative.S
new file mode 100644
index 00..3692eaa3cf
--- /dev/null
+++ b/common-user/native/libnative.S
@@ -0,0 +1,69 @@
+#if defined(i386) || defined(x86_64)
+/*
+ * An unused instruction is utilized to mark a native call.
+ */
+#define __SPECIAL_INSTR .byte 0x0f, 0xff;
+#define __RET_INSTR ret;
+#endif
+
+#if defined(arm) || defined(aarch64)
+/*
+ * HLT is an invalid instruction for userspace programs,
+ * and is used to mark a native call.
+ */
+#define __SPECIAL_INSTR hlt 0x;
+#if defined(aarch64)
+#define __RET_INSTR ret;
+#else
+#define __RET_INSTR bx lr;
+#endif
+#endif
+
+
+#if defined(mips) || defined(mips64)
+/*
+ * The syscall instruction contains 20 unused bits, which are typically
+ * set to 0. These bits can be used to store non-zero data,
+ * distinguishing them from a regular syscall instruction.
+ */
+#define __SPECIAL_INSTR syscall 0x;
+#define __RET_INSTR jr $ra;
+#endif
+
+/* Symbols of native functions */
+.section .data
+sym_memset:  .asciz "memset"
+sym_memcpy:  .asciz "memcpy"
+sym_strncpy:  .asciz "strncpy"
+sym_memcmp:  .asciz "memcmp"
+sym_strncmp:  .asciz "strncmp"
+sym_strcpy:  .asciz "strcpy"
+sym_strcat:  .asciz "strcat"
+sym_strcmp:  .asciz "strcmp"
+
+.macro define_function name
+\name:
+#if defined(x86_64) || defined(aarch64)
+__SPECIAL_INSTR
+.quad sym_\name
+__RET_INSTR
+#elif defined(mips64)
+.align 4
+__SPECIAL_INSTR
+.quad sym_\name
+__RET_INSTR
+#elif defined(i386) || defined(mips) || defined(arm)
+__SPECIAL_INSTR
+.long sym_\name
+__RET_INSTR
+#endif
+.endm
+
+define_function memcpy
+define_function strncpy
+define_function memset
+define_function memcmp
+define_function strncmp
+define_function strcpy
+define_function strcat
+define_function strcmp
diff --git a/configure b/configure
index 7a1e463d9c..de533b27a2 100755
--- a/configure
+++ b/configure
@@ -1826,6 +1826,45 @@ if test "$tcg" = "enabled"; then
 fi
 )
 
+# common-user/native configuration
+(mkdir -p common-user/native
+
+native_targets=
+for target in $target_list; do
+  case $target in
+*-softmmu)
+continue
+;;
+  esac
+
+  # native call is only supported on these architectures
+  arch=${target%%-*}
+  config_target_mak=common-user/native/${target}/config-target.mak
+  case $arch in
+i386|x86_64|arm|aarch64|mips|mips64)
+  if test -f cross-build/${target}/config-target.mak; then
+mkdir -p "common-user/native/${target}"
+ln -srf cross-build/${target}/config-target.mak "$config_target_mak"
+if test $arch = arm; then
+  echo "EXTRA_NATIVE_CALL_FLAGS=-marm" >> "$config_target_mak"
+fi
+if test $arch =