As promised on yesterday's SystemTap call, here's inat.c. It consists of a couple of tables that capture more information about the x86 instruction sets. For example, you could use this code to determine whether an opcode/instruction is invalid, privileged, a floating-point op, or in some other way of possible interest to uprobes and/or kprobes.
Also included is cmp.c, a user program that provides an example of the tables' use and also serves as a check against the tables currently in use by x86 ubp/uprobes. (There are a few differences, which reflect either corrections in inat.c or differences in how the tables are used.) The intention is eventually provide this as an enhancement to our x86 instruction-analysis code. inat.c uses the x86 kvm approach of one bitmap for each opcode. Comments welcome. Jim
/* * Report discrepancies between ubp/uprobes's opinion of which instructions * are probeable and what we'd conclude from the instruction-attribute tables. */ #include <stdlib.h> #include <stdio.h> typedef unsigned int u32; typedef unsigned short u16; #include "inat.c" /* The following tables are from ubp_x86.c. */ #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ << (row % 32)) #define BITS_PER_LONG (8*sizeof(long)) /* from arch/x86/include/asm/bitops.h */ static inline int test_bit(int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; } static const u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 00 */ W(0x10, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0), /* 10 */ W(0x20, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0)| /* 20 */ W(0x30, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,0), /* 30 */ W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ W(0x60, 0,0,0,1,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,0,0,1,1,1,1,1,1,0,0,0,0)| /* c0 */ W(0xd0, 1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1), /* d0 */ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* Good-instruction tables for 32-bit apps -- copied from i386 uprobes */ static const u32 good_insns_32[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0)| /* 00 */ W(0x10, 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0), /* 10 */ W(0x20, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1)| /* 20 */ W(0x30, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,1), /* 30 */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ W(0x60, 1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,0)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0)| /* c0 */ W(0xd0, 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1), /* d0 */ W(0xe0, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)| /* e0 */ W(0xf0, 0,0,1,1,0,1,1,1,1,1,0,0,1,1,1,1) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; /* Using this for both 64-bit and 32-bit apps */ static const u32 good_2byte_insns[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ------------------------------- */ W(0x00, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1)| /* 00 */ W(0x10, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* 10 */ W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)| /* 20 */ W(0x30, 0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 50 */ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 60 */ W(0x70, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 80 */ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ W(0xa0, 1,1,1,1,1,1,0,0,1,1,1,1,1,1,0,1)| /* a0 */ W(0xb0, 1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1), /* b0 */ W(0xc0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* c0 */ W(0xd0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* d0 */ W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* e0 */ W(0xf0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* f0 */ /* ------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; void report(const char *prefix, unsigned op, int ok32, int ok64, int ok32_ia, int ok64_ia) { static const char *okcode[2] = { "bad", "ok" }; if (ok32 != ok32_ia) printf("%s%2.2x 32: ubp %s, inat %s\n", prefix, op, okcode[ok32], okcode[ok32_ia]); if (ok64 != ok64_ia) printf("%s%2.2x 64: ubp %s, inat %s\n", prefix, op, okcode[ok64], okcode[ok64_ia]); } #define BAD_INSN_TYPES (INAT_PRIV | INAT_PFX | INAT_IO | INAT_INT | INAT_UDUB) #define BAD_INSN_TYPES_32 (INAT_INV32 | INAT_PRIV32 | BAD_INSN_TYPES) #define BAD_INSN_TYPES_64 (INAT_INV64 | INAT_REX | BAD_INSN_TYPES) main() { unsigned op; int ok32, ok64; // per ubp/uprobes int ok32_ia, ok64_ia; // per instruction-attribute tables for (op = 0; op <= 0xff; op++) { ok32 = test_bit(op, (unsigned long*) good_insns_32); ok64 = test_bit(op, (unsigned long*) good_insns_64); ok32_ia = !(special_1byte_insns[op] & BAD_INSN_TYPES_32); ok64_ia = !(special_1byte_insns[op] & BAD_INSN_TYPES_64); report("", op, ok32, ok64, ok32_ia, ok64_ia); } for (op = 0; op <= 0xff; op++) { ok32 = ok64 = test_bit(op, (unsigned long*) good_2byte_insns); ok32_ia = !(special_2byte_insns[op] & BAD_INSN_TYPES_32); ok64_ia = !(special_2byte_insns[op] & BAD_INSN_TYPES_64); report("0f ", op, ok32, ok64, ok32_ia, ok64_ia); } exit(0); }
/* * Special instruction attributes -- based on Tables A-1 through A-4, A-6, * and A-7 in the AMD64 Architecture Programmer's Manual, Volume 3. */ #define INAT_INV32 0x1 /* invalid in 32-bit mode */ #define INAT_INV64 0x2 /* invalid in 64-bit mode */ #define INAT_INV (INAT_INV32|INAT_INV64) #define INAT_PRIV 0x4 /* privileged */ /* * Note: * - opcode1 should never be a legacy prefix, since insn_get_prefixes() * eats those. * - Similarly, opcode1 should never be a REX prefix in 64-bit mode. */ #define INAT_PFX 0x8 /* legacy prefix */ #define INAT_REX 0x10 /* REX prefix */ #define INAT_IO 0x20 /* in, out */ #define INAT_INT 0x40 /* int3, int, into, int1 */ #define INAT_UDUB 0x80 /* otherwise dubious in user probes */ #define INAT_FP 0x100 /* floating point */ #define INAT_PRIV32 0x200 /* privileged in 32-bit mode*/ /* * TODO: Increase our precision in detecting invalid (INAT_INV) and * privileged (INAT_PRIV) instructions. We miss some invalid instructions, * and count a few unprivileged system-management instructions as * privileged. Details below. * * For each of the following groups, at least one combination of reg and/or * prefix values yields an invalid instruction. We currently mark that * opcode as invalid (INAT_INV) only if the group contains no valid * instructions. * 1a (8f) * 4 (fe) * 5 (ff) * 6 (0f 00) -- see below * 7 (0f 01) -- see below * 8 (ba) * 9 (c7) * 10 (b9) -- all invalid * 11 (c6, c7) * 12 (0f 71) * 13 (0f 72) * 14 (0f 73) * 15 (0f ae) * 17 (0f 78) * P (0f 0d) * * Similarly, for many 2-byte opcodes, the type of prefix (none, f3, 66, or f2) * helps determine the instruction type. For such opcodes, there are often * invalid op-code/prefix combinations. If there's any valid combination, * we consider the opcode valid. */ static u16 special_1byte_insns[0x100] = { [0x06] = INAT_INV64, [0x07] = INAT_INV64|INAT_UDUB, /* pop es */ [0x0e] = INAT_INV64, [0x16] = INAT_INV64, [0x17] = INAT_INV64|INAT_UDUB, /* pop ss */ [0x1e] = INAT_INV64, [0x1f] = INAT_INV64|INAT_UDUB, /* pop ds */ [0x26] = INAT_PFX, [0x27] = INAT_INV64, [0x2e] = INAT_PFX, [0x2f] = INAT_INV64, [0x36] = INAT_PFX, [0x37] = INAT_INV64, [0x3e] = INAT_PFX, [0x3f] = INAT_INV64, [0x40 ... 0x4f] = INAT_REX, [0x60 ... 0x62] = INAT_INV64, [0x63] = INAT_PRIV32, /* 32 = arpl, 64 = movsxd */ [0x64 ... 0x67] = INAT_PFX, [0x6c ... 0x6f] = INAT_IO, [0x82] = INAT_INV64, [0x9b] = INAT_FP, /* fwait */ [0xc4] = INAT_INV64, [0xc5] = INAT_INV64, [0xcc] = INAT_INT, [0xcd] = INAT_INT, [0xce] = INAT_INV64|INAT_INT, [0xcf] = INAT_UDUB, /* iret */ [0xd4] = INAT_INV64, [0xd5] = INAT_INV64, [0xd6] = INAT_INV, /* salc: undocumented/illegal */ [0xd8 ... 0xdf] = INAT_FP, [0xe4 ... 0xe7] = INAT_IO, [0xec ... 0xef] = INAT_IO, [0xf0] = INAT_PFX, [0xf1] = INAT_INT, [0xf4] = INAT_PRIV, /* hlt */ [0xfa] = INAT_PRIV, /* cli */ [0xfb] = INAT_PRIV /* sti */ }; static u16 special_2byte_insns[0x100] = { /* * TODO: Not all Group 6 instructions are privileged (P): * reg insn attribute * 0 sldt NP * 1 str NP * 2 lldt P * 3 ltr P * 4 verr NP * 5 verw NP * 6 - INV * 7 - INV */ [0x00] = INAT_PRIV, /* Group 6 - sys mgmt */ /* * TODO: Not all Group 7 instructions are privileged (P): * modrm value insn attribute * reg = 0 sgdt NP * reg = 1: * c8 monitor NP? * c9 mwait P * others - INV * reg = 2 lgdt P * reg = 3: * d8 vmrun P * d9 vmmcall NP * da vmload P * db vmsave P * dc stgi P * de skinit P * df invlpga P * reg = 4 smsw NP * reg = 5 - INV * reg = 6 lmsw P * reg = 7: * f8 swapgs P * f9 rdtscp NP? * others - INV */ [0x01] = INAT_PRIV, /* Group 7 - sys mgmt */ [0x02] = INAT_UDUB, /* lar */ [0x03] = INAT_UDUB, /* lsl */ [0x04] = INAT_INV, [0x05] = INAT_UDUB, /* syscall */ [0x06] = INAT_PRIV, /* clts */ [0x07] = INAT_PRIV, /* sysret */ [0x08] = INAT_PRIV, /* invd */ [0x09] = INAT_PRIV, /* wbinvd */ [0x0a] = INAT_INV, [0x0b] = INAT_INV, /* ud2 */ [0x0c] = INAT_INV, // [0x0d] = INAT_UDUB, /* prefetch */ [0x0f] = INAT_FP, /* 3-byte opcodes - 3DNow! */ [0x10 ... 0x17] = INAT_FP, /* TODO: Group 16 is prefetches (reg = 0-3) and nops (reg = 4-7). */ // [0x18] = INAT_UDUB, /* Group 16 - prefetch, nop */ [0x20] = INAT_PRIV, /* mov from CR */ [0x21] = INAT_PRIV, /* mov from DR */ [0x22] = INAT_PRIV, /* mov to CR */ [0x23] = INAT_PRIV, /* mov to DR */ [0x24 ... 0x27] = INAT_INV, [0x28 ... 0x2f] = INAT_FP, [0x30] = INAT_PRIV, /* wrmsr */ [0x32] = INAT_PRIV, /* rdmsr */ [0x34] = INAT_UDUB, /* sysenter */ [0x35] = INAT_PRIV, /* sysexit */ [0x36 ... 0x3f] = INAT_INV, [0x50 ... 0x5f] = INAT_FP, [0x60 ... 0x6f] = INAT_FP, [0x70 ... 0x79] = INAT_FP, [0x7a ... 0x7b] = INAT_INV, [0x7c ... 0x7f] = INAT_FP, [0xa6] = INAT_INV, [0xa7] = INAT_INV, /* TODO: In Group 15, mfence, lfence, and sfence aren't FP ops. */ [0xae] = INAT_FP, /* Group 15 */ [0xb8] = INAT_INV, [0xb9] = INAT_INV, [0xc2] = INAT_FP, [0xc4] = INAT_FP, [0xc5] = INAT_FP, [0xc6] = INAT_FP, [0xd0] = INAT_INV, [0xd1 ... 0xdf] = INAT_FP, [0xe0 ... 0xef] = INAT_FP, [0xf0] = INAT_INV, [0xf1 ... 0xfe] = INAT_FP, [0xff] = INAT_INV };