Module Name: src
Committed By: maxv
Date: Thu Mar 7 15:47:34 UTC 2019
Modified Files:
src/lib/libnvmm: libnvmm_x86.c
Log Message:
Micro optimizations:
- Compress x86_rexpref, x86_regmodrm, x86_opcode and x86_instr.
- Cache-align the register, opcode and group tables.
- Modify the opcode tables to have 256 entries, and avoid a lookup.
To generate a diff of this commit:
cvs rdiff -u -r1.26 -r1.27 src/lib/libnvmm/libnvmm_x86.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/lib/libnvmm/libnvmm_x86.c
diff -u src/lib/libnvmm/libnvmm_x86.c:1.26 src/lib/libnvmm/libnvmm_x86.c:1.27
--- src/lib/libnvmm/libnvmm_x86.c:1.26 Tue Feb 26 12:23:12 2019
+++ src/lib/libnvmm/libnvmm_x86.c Thu Mar 7 15:47:34 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: libnvmm_x86.c,v 1.26 2019/02/26 12:23:12 maxv Exp $ */
+/* $NetBSD: libnvmm_x86.c,v 1.27 2019/03/07 15:47:34 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -46,6 +46,7 @@
#include "nvmm.h"
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define __cacheline_aligned __attribute__((__aligned__(64)))
#include <x86/specialreg.h>
@@ -904,15 +905,15 @@ struct x86_legpref {
bool adr_ovr:1;
bool rep:1;
bool repn:1;
- int seg;
+ int8_t seg;
};
struct x86_rexpref {
- bool present;
- bool w;
- bool r;
- bool x;
- bool b;
+ bool b:1;
+ bool x:1;
+ bool r:1;
+ bool w:1;
+ bool present:1;
};
struct x86_reg {
@@ -962,10 +963,9 @@ enum REGMODRM__Rm {
};
struct x86_regmodrm {
- bool present;
- enum REGMODRM__Mod mod;
- enum REGMODRM__Reg reg;
- enum REGMODRM__Rm rm;
+ uint8_t mod:2;
+ uint8_t reg:3;
+ uint8_t rm:3;
};
struct x86_immediate {
@@ -999,22 +999,20 @@ struct x86_store {
};
struct x86_instr {
- size_t len;
+ uint8_t len;
struct x86_legpref legpref;
struct x86_rexpref rexpref;
- size_t operand_size;
- size_t address_size;
- uint64_t zeroextend_mask;
-
struct x86_regmodrm regmodrm;
+ uint8_t operand_size;
+ uint8_t address_size;
+ uint64_t zeroextend_mask;
const struct x86_opcode *opcode;
+ const struct x86_emul *emul;
struct x86_store src;
struct x86_store dst;
struct x86_store *strm;
-
- const struct x86_emul *emul;
};
struct x86_decode_fsm {
@@ -1030,22 +1028,21 @@ struct x86_decode_fsm {
};
struct x86_opcode {
- uint8_t byte;
- bool regmodrm;
- bool regtorm;
- bool dmo;
- bool todmo;
- bool movs;
- bool stos;
- bool lods;
- bool szoverride;
- int defsize;
- int allsize;
- bool group1;
- bool group3;
- bool group11;
- bool immediate;
- int flags;
+ bool valid:1;
+ bool regmodrm:1;
+ bool regtorm:1;
+ bool dmo:1;
+ bool todmo:1;
+ bool movs:1;
+ bool stos:1;
+ bool lods:1;
+ bool szoverride:1;
+ bool group1:1;
+ bool group3:1;
+ bool group11:1;
+ bool immediate:1;
+ uint8_t defsize;
+ uint8_t flags;
const struct x86_emul *emul;
};
@@ -1062,59 +1059,56 @@ struct x86_group_entry {
#define FLAG_immz 0x02
#define FLAG_ze 0x04
-static const struct x86_group_entry group1[8] = {
+static const struct x86_group_entry group1[8] __cacheline_aligned = {
[1] = { .emul = &x86_emul_or },
[4] = { .emul = &x86_emul_and },
[6] = { .emul = &x86_emul_xor },
[7] = { .emul = &x86_emul_cmp }
};
-static const struct x86_group_entry group3[8] = {
+static const struct x86_group_entry group3[8] __cacheline_aligned = {
[0] = { .emul = &x86_emul_test },
[1] = { .emul = &x86_emul_test }
};
-static const struct x86_group_entry group11[8] = {
+static const struct x86_group_entry group11[8] __cacheline_aligned = {
[0] = { .emul = &x86_emul_mov }
};
-static const struct x86_opcode primary_opcode_table[] = {
+static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
/*
* Group1
*/
- {
+ [0x80] = {
/* Eb, Ib */
- .byte = 0x80,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.group1 = true,
.immediate = true,
.emul = NULL /* group1 */
},
- {
+ [0x81] = {
/* Ev, Iz */
- .byte = 0x81,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group1 = true,
.immediate = true,
.flags = FLAG_immz,
.emul = NULL /* group1 */
},
- {
+ [0x83] = {
/* Ev, Ib */
- .byte = 0x83,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group1 = true,
.immediate = true,
.flags = FLAG_imm8,
@@ -1124,26 +1118,24 @@ static const struct x86_opcode primary_o
/*
* Group3
*/
- {
+ [0xF6] = {
/* Eb, Ib */
- .byte = 0xF6,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.group3 = true,
.immediate = true,
.emul = NULL /* group3 */
},
- {
+ [0xF7] = {
/* Ev, Iz */
- .byte = 0xF7,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group3 = true,
.immediate = true,
.flags = FLAG_immz,
@@ -1153,26 +1145,24 @@ static const struct x86_opcode primary_o
/*
* Group11
*/
- {
+ [0xC6] = {
/* Eb, Ib */
- .byte = 0xC6,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.group11 = true,
.immediate = true,
.emul = NULL /* group11 */
},
- {
+ [0xC7] = {
/* Ev, Iz */
- .byte = 0xC7,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.group11 = true,
.immediate = true,
.flags = FLAG_immz,
@@ -1182,353 +1172,321 @@ static const struct x86_opcode primary_o
/*
* OR
*/
- {
+ [0x08] = {
/* Eb, Gb */
- .byte = 0x08,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_or
},
- {
+ [0x09] = {
/* Ev, Gv */
- .byte = 0x09,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_or
},
- {
+ [0x0A] = {
/* Gb, Eb */
- .byte = 0x0A,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_or
},
- {
+ [0x0B] = {
/* Gv, Ev */
- .byte = 0x0B,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_or
},
/*
* AND
*/
- {
+ [0x20] = {
/* Eb, Gb */
- .byte = 0x20,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_and
},
- {
+ [0x21] = {
/* Ev, Gv */
- .byte = 0x21,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_and
},
- {
+ [0x22] = {
/* Gb, Eb */
- .byte = 0x22,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_and
},
- {
+ [0x23] = {
/* Gv, Ev */
- .byte = 0x23,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_and
},
/*
* SUB
*/
- {
+ [0x28] = {
/* Eb, Gb */
- .byte = 0x28,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_sub
},
- {
+ [0x29] = {
/* Ev, Gv */
- .byte = 0x29,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_sub
},
- {
+ [0x2A] = {
/* Gb, Eb */
- .byte = 0x2A,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_sub
},
- {
+ [0x2B] = {
/* Gv, Ev */
- .byte = 0x2B,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_sub
},
/*
* XOR
*/
- {
+ [0x30] = {
/* Eb, Gb */
- .byte = 0x30,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_xor
},
- {
+ [0x31] = {
/* Ev, Gv */
- .byte = 0x31,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_xor
},
- {
+ [0x32] = {
/* Gb, Eb */
- .byte = 0x32,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_xor
},
- {
+ [0x33] = {
/* Gv, Ev */
- .byte = 0x33,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_xor
},
/*
* MOV
*/
- {
+ [0x88] = {
/* Eb, Gb */
- .byte = 0x88,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_mov
},
- {
+ [0x89] = {
/* Ev, Gv */
- .byte = 0x89,
+ .valid = true,
.regmodrm = true,
.regtorm = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_mov
},
- {
+ [0x8A] = {
/* Gb, Eb */
- .byte = 0x8A,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_mov
},
- {
+ [0x8B] = {
/* Gv, Ev */
- .byte = 0x8B,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_mov
},
- {
+ [0xA0] = {
/* AL, Ob */
- .byte = 0xA0,
+ .valid = true,
.dmo = true,
.todmo = false,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_mov
},
- {
+ [0xA1] = {
/* rAX, Ov */
- .byte = 0xA1,
+ .valid = true,
.dmo = true,
.todmo = false,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_mov
},
- {
+ [0xA2] = {
/* Ob, AL */
- .byte = 0xA2,
+ .valid = true,
.dmo = true,
.todmo = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_mov
},
- {
+ [0xA3] = {
/* Ov, rAX */
- .byte = 0xA3,
+ .valid = true,
.dmo = true,
.todmo = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_mov
},
/*
* MOVS
*/
- {
+ [0xA4] = {
/* Yb, Xb */
- .byte = 0xA4,
+ .valid = true,
.movs = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_movs
},
- {
+ [0xA5] = {
/* Yv, Xv */
- .byte = 0xA5,
+ .valid = true,
.movs = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_movs
},
/*
* STOS
*/
- {
+ [0xAA] = {
/* Yb, AL */
- .byte = 0xAA,
+ .valid = true,
.stos = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_stos
},
- {
+ [0xAB] = {
/* Yv, rAX */
- .byte = 0xAB,
+ .valid = true,
.stos = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_stos
},
/*
* LODS
*/
- {
+ [0xAC] = {
/* AL, Xb */
- .byte = 0xAC,
+ .valid = true,
.lods = true,
.szoverride = false,
.defsize = OPSIZE_BYTE,
- .allsize = -1,
.emul = &x86_emul_lods
},
- {
+ [0xAD] = {
/* rAX, Xv */
- .byte = 0xAD,
+ .valid = true,
.lods = true,
.szoverride = true,
.defsize = -1,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.emul = &x86_emul_lods
},
};
-static const struct x86_opcode secondary_opcode_table[] = {
+static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
/*
* MOVZX
*/
- {
+ [0xB6] = {
/* Gv, Eb */
- .byte = 0xB6,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = OPSIZE_BYTE,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.flags = FLAG_ze,
.emul = &x86_emul_mov
},
- {
+ [0xB7] = {
/* Gv, Ew */
- .byte = 0xB7,
+ .valid = true,
.regmodrm = true,
.regtorm = false,
.szoverride = true,
.defsize = OPSIZE_WORD,
- .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
.flags = FLAG_ze,
.emul = &x86_emul_mov
},
@@ -1537,7 +1495,7 @@ static const struct x86_opcode secondary
static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
/* [REX-present][enc][opsize] */
-static const struct x86_reg gpr_map__special[2][4][8] = {
+static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
[false] = {
/* No REX prefix. */
[0b00] = {
@@ -1627,7 +1585,7 @@ static const struct x86_reg gpr_map__spe
};
/* [depends][enc][size] */
-static const struct x86_reg gpr_map[2][8][8] = {
+static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
[false] = {
/* Not extended. */
[0b000] = {
@@ -1813,7 +1771,7 @@ fsm_read(struct x86_decode_fsm *fsm, uin
return 0;
}
-static void
+static inline void
fsm_advance(struct x86_decode_fsm *fsm, size_t n,
int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
{
@@ -2188,10 +2146,9 @@ node_regmodrm(struct x86_decode_fsm *fsm
opcode = instr->opcode;
- instr->regmodrm.present = true;
- instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
- instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
+ instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
+ instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
if (opcode->regtorm) {
strg = &instr->src;
@@ -2316,11 +2273,6 @@ get_operand_size(struct x86_decode_fsm *
}
}
- /* See if available */
- if ((opcode->allsize & opsize) == 0) {
- // XXX do we care?
- }
-
return opsize;
}
@@ -2353,21 +2305,15 @@ node_primary_opcode(struct x86_decode_fs
{
const struct x86_opcode *opcode;
uint8_t byte;
- size_t i, n;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
- n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
- for (i = 0; i < n; i++) {
- if (primary_opcode_table[i].byte == byte)
- break;
- }
- if (i == n) {
+ opcode = &primary_opcode_table[byte];
+ if (__predict_false(!opcode->valid)) {
return -1;
}
- opcode = &primary_opcode_table[i];
instr->opcode = opcode;
instr->emul = opcode->emul;
@@ -2400,21 +2346,15 @@ node_secondary_opcode(struct x86_decode_
{
const struct x86_opcode *opcode;
uint8_t byte;
- size_t i, n;
if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
return -1;
}
- n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
- for (i = 0; i < n; i++) {
- if (secondary_opcode_table[i].byte == byte)
- break;
- }
- if (i == n) {
+ opcode = &secondary_opcode_table[byte];
+ if (__predict_false(!opcode->valid)) {
return -1;
}
- opcode = &secondary_opcode_table[i];
instr->opcode = opcode;
instr->emul = opcode->emul;
@@ -2495,11 +2435,11 @@ node_rex_prefix(struct x86_decode_fsm *f
if (__predict_false(!fsm->is64bit)) {
return -1;
}
- rexpref->present = true;
- rexpref->w = ((byte & 0x8) != 0);
- rexpref->r = ((byte & 0x4) != 0);
- rexpref->x = ((byte & 0x2) != 0);
rexpref->b = ((byte & 0x1) != 0);
+ rexpref->x = ((byte & 0x2) != 0);
+ rexpref->r = ((byte & 0x4) != 0);
+ rexpref->w = ((byte & 0x8) != 0);
+ rexpref->present = true;
n = 1;
}