The x86 vector instruction set is extremely irregular. With newer
editions, Intel has filled in some of the blanks. However, we don't
get many 64-bit operations until SSE4.2, introduced in 2009.
The subsequent edition was for AVX1, introduced in 2011, which added
three-operand addressing, and adjusts how all instructions should be
encoded.
Given the relatively narrow 2 year window between possible to support
and desirable to support, and to vastly simplify code maintainence,
I am only planning to support AVX1 and later cpus.
Signed-off-by: Richard Henderson
---
tcg/i386/tcg-target.h | 36 ++-
tcg/i386/tcg-target.inc.c | 561 ++
2 files changed, 546 insertions(+), 51 deletions(-)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b89dababf4..f9d3fc4a93 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -30,10 +30,10 @@
#ifdef __x86_64__
# define TCG_TARGET_REG_BITS 64
-# define TCG_TARGET_NB_REGS 16
+# define TCG_TARGET_NB_REGS 32
#else
# define TCG_TARGET_REG_BITS 32
-# define TCG_TARGET_NB_REGS8
+# define TCG_TARGET_NB_REGS 24
#endif
typedef enum {
@@ -56,6 +56,26 @@ typedef enum {
TCG_REG_R13,
TCG_REG_R14,
TCG_REG_R15,
+
+TCG_REG_XMM0,
+TCG_REG_XMM1,
+TCG_REG_XMM2,
+TCG_REG_XMM3,
+TCG_REG_XMM4,
+TCG_REG_XMM5,
+TCG_REG_XMM6,
+TCG_REG_XMM7,
+
+/* 64-bit registers; likewise always define. */
+TCG_REG_XMM8,
+TCG_REG_XMM9,
+TCG_REG_XMM10,
+TCG_REG_XMM11,
+TCG_REG_XMM12,
+TCG_REG_XMM13,
+TCG_REG_XMM14,
+TCG_REG_XMM15,
+
TCG_REG_RAX = TCG_REG_EAX,
TCG_REG_RCX = TCG_REG_ECX,
TCG_REG_RDX = TCG_REG_EDX,
@@ -77,6 +97,8 @@ typedef enum {
extern bool have_bmi1;
extern bool have_popcnt;
+extern bool have_avx1;
+extern bool have_avx2;
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -146,6 +168,16 @@ extern bool have_popcnt;
#define TCG_TARGET_HAS_mulsh_i640
#endif
+/* We do not support older SSE systems, only beginning with AVX1. */
+#define TCG_TARGET_HAS_v64 have_avx1
+#define TCG_TARGET_HAS_v128 have_avx1
+#define TCG_TARGET_HAS_v256 have_avx2
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_neg_vec 0
+
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
(((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
((ofs) == 0 && (len) == 16))
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 63d27f10e7..e9a4d92598 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -28,9 +28,14 @@
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#if TCG_TARGET_REG_BITS == 64
"%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
-"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
#else
"%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+#endif
+"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
+#if TCG_TARGET_REG_BITS == 64
+"%xmm8", "%xmm9", "%xmm10", "%xmm11",
+"%xmm12", "%xmm13", "%xmm14", "%xmm15",
#endif
};
#endif
@@ -60,6 +65,28 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_ECX,
TCG_REG_EDX,
TCG_REG_EAX,
+#endif
+TCG_REG_XMM0,
+TCG_REG_XMM1,
+TCG_REG_XMM2,
+TCG_REG_XMM3,
+TCG_REG_XMM4,
+TCG_REG_XMM5,
+#ifndef _WIN64
+/* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
+ any of them. Therefore only allow xmm0-xmm5 to be allocated. */
+TCG_REG_XMM6,
+TCG_REG_XMM7,
+#if TCG_TARGET_REG_BITS == 64
+TCG_REG_XMM8,
+TCG_REG_XMM9,
+TCG_REG_XMM10,
+TCG_REG_XMM11,
+TCG_REG_XMM12,
+TCG_REG_XMM13,
+TCG_REG_XMM14,
+TCG_REG_XMM15,
+#endif
#endif
};
@@ -94,7 +121,7 @@ static const int tcg_target_call_oarg_regs[] = {
#define TCG_CT_CONST_I32 0x400
#define TCG_CT_CONST_WSZ 0x800
-/* Registers used with L constraint, which are the first argument
+/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
i386. */
#if TCG_TARGET_REG_BITS == 64
@@ -125,6 +152,8 @@ static bool have_cmov;
it there. Therefore we always define the variable. */
bool have_bmi1;
bool have_popcnt;
+bool have_avx1;
+bool have_avx2;
#ifdef CONFIG_CPUID_H
static bool have_movbe;
@@ -148,6 +177,8 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
if (value != (int32_t)value) {
tcg_abort();
}
+/* FALLTHRU */
+case R_386_32:
tcg_patch32(code_ptr, value);
break;
case R_386_PC8:
@@ -162,6 +193,14 @@ static void