Module Name:    src
Committed By:   maxv
Date:           Thu Mar  7 15:47:34 UTC 2019

Modified Files:
        src/lib/libnvmm: libnvmm_x86.c

Log Message:
Micro optimizations:

 - Compress x86_rexpref, x86_regmodrm, x86_opcode and x86_instr.
 - Cache-align the register, opcode and group tables.
 - Modify the opcode tables to have 256 entries, and avoid a lookup.


To generate a diff of this commit:
cvs rdiff -u -r1.26 -r1.27 src/lib/libnvmm/libnvmm_x86.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libnvmm/libnvmm_x86.c
diff -u src/lib/libnvmm/libnvmm_x86.c:1.26 src/lib/libnvmm/libnvmm_x86.c:1.27
--- src/lib/libnvmm/libnvmm_x86.c:1.26	Tue Feb 26 12:23:12 2019
+++ src/lib/libnvmm/libnvmm_x86.c	Thu Mar  7 15:47:34 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm_x86.c,v 1.26 2019/02/26 12:23:12 maxv Exp $	*/
+/*	$NetBSD: libnvmm_x86.c,v 1.27 2019/03/07 15:47:34 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -46,6 +46,7 @@
 #include "nvmm.h"
 
 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define __cacheline_aligned __attribute__((__aligned__(64)))
 
 #include <x86/specialreg.h>
 
@@ -904,15 +905,15 @@ struct x86_legpref {
 	bool adr_ovr:1;
 	bool rep:1;
 	bool repn:1;
-	int seg;
+	int8_t seg;
 };
 
 struct x86_rexpref {
-	bool present;
-	bool w;
-	bool r;
-	bool x;
-	bool b;
+	bool b:1;
+	bool x:1;
+	bool r:1;
+	bool w:1;
+	bool present:1;
 };
 
 struct x86_reg {
@@ -962,10 +963,9 @@ enum REGMODRM__Rm {
 };
 
 struct x86_regmodrm {
-	bool present;
-	enum REGMODRM__Mod mod;
-	enum REGMODRM__Reg reg;
-	enum REGMODRM__Rm rm;
+	uint8_t mod:2;
+	uint8_t reg:3;
+	uint8_t rm:3;
 };
 
 struct x86_immediate {
@@ -999,22 +999,20 @@ struct x86_store {
 };
 
 struct x86_instr {
-	size_t len;
+	uint8_t len;
 	struct x86_legpref legpref;
 	struct x86_rexpref rexpref;
-	size_t operand_size;
-	size_t address_size;
-	uint64_t zeroextend_mask;
-
 	struct x86_regmodrm regmodrm;
+	uint8_t operand_size;
+	uint8_t address_size;
+	uint64_t zeroextend_mask;
 
 	const struct x86_opcode *opcode;
+	const struct x86_emul *emul;
 
 	struct x86_store src;
 	struct x86_store dst;
 	struct x86_store *strm;
-
-	const struct x86_emul *emul;
 };
 
 struct x86_decode_fsm {
@@ -1030,22 +1028,21 @@ struct x86_decode_fsm {
 };
 
 struct x86_opcode {
-	uint8_t byte;
-	bool regmodrm;
-	bool regtorm;
-	bool dmo;
-	bool todmo;
-	bool movs;
-	bool stos;
-	bool lods;
-	bool szoverride;
-	int defsize;
-	int allsize;
-	bool group1;
-	bool group3;
-	bool group11;
-	bool immediate;
-	int flags;
+	bool valid:1;
+	bool regmodrm:1;
+	bool regtorm:1;
+	bool dmo:1;
+	bool todmo:1;
+	bool movs:1;
+	bool stos:1;
+	bool lods:1;
+	bool szoverride:1;
+	bool group1:1;
+	bool group3:1;
+	bool group11:1;
+	bool immediate:1;
+	uint8_t defsize;
+	uint8_t flags;
 	const struct x86_emul *emul;
 };
 
@@ -1062,59 +1059,56 @@ struct x86_group_entry {
 #define FLAG_immz	0x02
 #define FLAG_ze		0x04
 
-static const struct x86_group_entry group1[8] = {
+static const struct x86_group_entry group1[8] __cacheline_aligned = {
 	[1] = { .emul = &x86_emul_or },
 	[4] = { .emul = &x86_emul_and },
 	[6] = { .emul = &x86_emul_xor },
 	[7] = { .emul = &x86_emul_cmp }
 };
 
-static const struct x86_group_entry group3[8] = {
+static const struct x86_group_entry group3[8] __cacheline_aligned = {
 	[0] = { .emul = &x86_emul_test },
 	[1] = { .emul = &x86_emul_test }
 };
 
-static const struct x86_group_entry group11[8] = {
+static const struct x86_group_entry group11[8] __cacheline_aligned = {
 	[0] = { .emul = &x86_emul_mov }
 };
 
-static const struct x86_opcode primary_opcode_table[] = {
+static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
 	/*
 	 * Group1
 	 */
-	{
+	[0x80] = {
 		/* Eb, Ib */
-		.byte = 0x80,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.group1 = true,
 		.immediate = true,
 		.emul = NULL /* group1 */
 	},
-	{
+	[0x81] = {
 		/* Ev, Iz */
-		.byte = 0x81,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.group1 = true,
 		.immediate = true,
 		.flags = FLAG_immz,
 		.emul = NULL /* group1 */
 	},
-	{
+	[0x83] = {
 		/* Ev, Ib */
-		.byte = 0x83,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.group1 = true,
 		.immediate = true,
 		.flags = FLAG_imm8,
@@ -1124,26 +1118,24 @@ static const struct x86_opcode primary_o
 	/*
 	 * Group3
 	 */
-	{
+	[0xF6] = {
 		/* Eb, Ib */
-		.byte = 0xF6,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.group3 = true,
 		.immediate = true,
 		.emul = NULL /* group3 */
 	},
-	{
+	[0xF7] = {
 		/* Ev, Iz */
-		.byte = 0xF7,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.group3 = true,
 		.immediate = true,
 		.flags = FLAG_immz,
@@ -1153,26 +1145,24 @@ static const struct x86_opcode primary_o
 	/*
 	 * Group11
 	 */
-	{
+	[0xC6] = {
 		/* Eb, Ib */
-		.byte = 0xC6,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.group11 = true,
 		.immediate = true,
 		.emul = NULL /* group11 */
 	},
-	{
+	[0xC7] = {
 		/* Ev, Iz */
-		.byte = 0xC7,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.group11 = true,
 		.immediate = true,
 		.flags = FLAG_immz,
@@ -1182,353 +1172,321 @@ static const struct x86_opcode primary_o
 	/*
 	 * OR
 	 */
-	{
+	[0x08] = {
 		/* Eb, Gb */
-		.byte = 0x08,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_or
 	},
-	{
+	[0x09] = {
 		/* Ev, Gv */
-		.byte = 0x09,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_or
 	},
-	{
+	[0x0A] = {
 		/* Gb, Eb */
-		.byte = 0x0A,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_or
 	},
-	{
+	[0x0B] = {
 		/* Gv, Ev */
-		.byte = 0x0B,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_or
 	},
 
 	/*
 	 * AND
 	 */
-	{
+	[0x20] = {
 		/* Eb, Gb */
-		.byte = 0x20,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_and
 	},
-	{
+	[0x21] = {
 		/* Ev, Gv */
-		.byte = 0x21,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_and
 	},
-	{
+	[0x22] = {
 		/* Gb, Eb */
-		.byte = 0x22,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_and
 	},
-	{
+	[0x23] = {
 		/* Gv, Ev */
-		.byte = 0x23,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_and
 	},
 
 	/*
 	 * SUB
 	 */
-	{
+	[0x28] = {
 		/* Eb, Gb */
-		.byte = 0x28,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_sub
 	},
-	{
+	[0x29] = {
 		/* Ev, Gv */
-		.byte = 0x29,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_sub
 	},
-	{
+	[0x2A] = {
 		/* Gb, Eb */
-		.byte = 0x2A,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_sub
 	},
-	{
+	[0x2B] = {
 		/* Gv, Ev */
-		.byte = 0x2B,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_sub
 	},
 
 	/*
 	 * XOR
 	 */
-	{
+	[0x30] = {
 		/* Eb, Gb */
-		.byte = 0x30,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_xor
 	},
-	{
+	[0x31] = {
 		/* Ev, Gv */
-		.byte = 0x31,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_xor
 	},
-	{
+	[0x32] = {
 		/* Gb, Eb */
-		.byte = 0x32,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_xor
 	},
-	{
+	[0x33] = {
 		/* Gv, Ev */
-		.byte = 0x33,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_xor
 	},
 
 	/*
 	 * MOV
 	 */
-	{
+	[0x88] = {
 		/* Eb, Gb */
-		.byte = 0x88,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0x89] = {
 		/* Ev, Gv */
-		.byte = 0x89,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0x8A] = {
 		/* Gb, Eb */
-		.byte = 0x8A,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0x8B] = {
 		/* Gv, Ev */
-		.byte = 0x8B,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0xA0] = {
 		/* AL, Ob */
-		.byte = 0xA0,
+		.valid = true,
 		.dmo = true,
 		.todmo = false,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0xA1] = {
 		/* rAX, Ov */
-		.byte = 0xA1,
+		.valid = true,
 		.dmo = true,
 		.todmo = false,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0xA2] = {
 		/* Ob, AL */
-		.byte = 0xA2,
+		.valid = true,
 		.dmo = true,
 		.todmo = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0xA3] = {
 		/* Ov, rAX */
-		.byte = 0xA3,
+		.valid = true,
 		.dmo = true,
 		.todmo = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_mov
 	},
 
 	/*
 	 * MOVS
 	 */
-	{
+	[0xA4] = {
 		/* Yb, Xb */
-		.byte = 0xA4,
+		.valid = true,
 		.movs = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_movs
 	},
-	{
+	[0xA5] = {
 		/* Yv, Xv */
-		.byte = 0xA5,
+		.valid = true,
 		.movs = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_movs
 	},
 
 	/*
 	 * STOS
 	 */
-	{
+	[0xAA] = {
 		/* Yb, AL */
-		.byte = 0xAA,
+		.valid = true,
 		.stos = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_stos
 	},
-	{
+	[0xAB] = {
 		/* Yv, rAX */
-		.byte = 0xAB,
+		.valid = true,
 		.stos = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_stos
 	},
 
 	/*
 	 * LODS
 	 */
-	{
+	[0xAC] = {
 		/* AL, Xb */
-		.byte = 0xAC,
+		.valid = true,
 		.lods = true,
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
-		.allsize = -1,
 		.emul = &x86_emul_lods
 	},
-	{
+	[0xAD] = {
 		/* rAX, Xv */
-		.byte = 0xAD,
+		.valid = true,
 		.lods = true,
 		.szoverride = true,
 		.defsize = -1,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.emul = &x86_emul_lods
 	},
 };
 
-static const struct x86_opcode secondary_opcode_table[] = {
+static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
 	/*
 	 * MOVZX
 	 */
-	{
+	[0xB6] = {
 		/* Gv, Eb */
-		.byte = 0xB6,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = OPSIZE_BYTE,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.flags = FLAG_ze,
 		.emul = &x86_emul_mov
 	},
-	{
+	[0xB7] = {
 		/* Gv, Ew */
-		.byte = 0xB7,
+		.valid = true,
 		.regmodrm = true,
 		.regtorm = false,
 		.szoverride = true,
 		.defsize = OPSIZE_WORD,
-		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.flags = FLAG_ze,
 		.emul = &x86_emul_mov
 	},
@@ -1537,7 +1495,7 @@ static const struct x86_opcode secondary
 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
 
 /* [REX-present][enc][opsize] */
-static const struct x86_reg gpr_map__special[2][4][8] = {
+static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
 	[false] = {
 		/* No REX prefix. */
 		[0b00] = {
@@ -1627,7 +1585,7 @@ static const struct x86_reg gpr_map__spe
 };
 
 /* [depends][enc][size] */
-static const struct x86_reg gpr_map[2][8][8] = {
+static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
 	[false] = {
 		/* Not extended. */
 		[0b000] = {
@@ -1813,7 +1771,7 @@ fsm_read(struct x86_decode_fsm *fsm, uin
 	return 0;
 }
 
-static void
+static inline void
 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
     int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
 {
@@ -2188,10 +2146,9 @@ node_regmodrm(struct x86_decode_fsm *fsm
 
 	opcode = instr->opcode;
 
-	instr->regmodrm.present = true;
-	instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
-	instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
 	instr->regmodrm.rm  = ((byte & 0b00000111) >> 0);
+	instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
+	instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
 
 	if (opcode->regtorm) {
 		strg = &instr->src;
@@ -2316,11 +2273,6 @@ get_operand_size(struct x86_decode_fsm *
 		}
 	}
 
-	/* See if available */
-	if ((opcode->allsize & opsize) == 0) {
-		// XXX do we care?
-	}
-
 	return opsize;
 }
 
@@ -2353,21 +2305,15 @@ node_primary_opcode(struct x86_decode_fs
 {
 	const struct x86_opcode *opcode;
 	uint8_t byte;
-	size_t i, n;
 
 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
 		return -1;
 	}
 
-	n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
-	for (i = 0; i < n; i++) {
-		if (primary_opcode_table[i].byte == byte)
-			break;
-	}
-	if (i == n) {
+	opcode = &primary_opcode_table[byte];
+	if (__predict_false(!opcode->valid)) {
 		return -1;
 	}
-	opcode = &primary_opcode_table[i];
 
 	instr->opcode = opcode;
 	instr->emul = opcode->emul;
@@ -2400,21 +2346,15 @@ node_secondary_opcode(struct x86_decode_
 {
 	const struct x86_opcode *opcode;
 	uint8_t byte;
-	size_t i, n;
 
 	if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
 		return -1;
 	}
 
-	n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
-	for (i = 0; i < n; i++) {
-		if (secondary_opcode_table[i].byte == byte)
-			break;
-	}
-	if (i == n) {
+	opcode = &secondary_opcode_table[byte];
+	if (__predict_false(!opcode->valid)) {
 		return -1;
 	}
-	opcode = &secondary_opcode_table[i];
 
 	instr->opcode = opcode;
 	instr->emul = opcode->emul;
@@ -2495,11 +2435,11 @@ node_rex_prefix(struct x86_decode_fsm *f
 		if (__predict_false(!fsm->is64bit)) {
 			return -1;
 		}
-		rexpref->present = true;
-		rexpref->w = ((byte & 0x8) != 0);
-		rexpref->r = ((byte & 0x4) != 0);
-		rexpref->x = ((byte & 0x2) != 0);
 		rexpref->b = ((byte & 0x1) != 0);
+		rexpref->x = ((byte & 0x2) != 0);
+		rexpref->r = ((byte & 0x4) != 0);
+		rexpref->w = ((byte & 0x8) != 0);
+		rexpref->present = true;
 		n = 1;
 	}
 

Reply via email to