On Sat, Nov 24, 2012 at 09:39:35AM -0800, Richard Henderson wrote: > In addition to better compile-time detection, perform runtime detection. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/i386/tcg-target.c | 34 +++++++++++++++++++++++++++++++++- > tcg/i386/tcg-target.h | 5 ----- > 2 files changed, 33 insertions(+), 6 deletions(-) > > Yall are right that there's no particularly good method with which > to detect i686 *or later*, and thus cmov support, in gcc. If one > uses -march=native with any processor made in the last 5 years, > one will have at least SSE1 support. So we can reasonably use that > as a clue. > > To fill in the holes, we can do the check at runtime. That does > involve a tiny amount of runtime overhead, testing a global variable. > I suspect that this is overhead is unmeasurable.
If this overhead is unmesurable, and I think it is something true, I think it would be better to just always use that on i386 (but not on x86_64) instead of having a complex compile time detection that could fail. Otherwise the patch looks fine. > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 6f3ad3c..b333b46 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -97,6 +97,20 @@ static const int tcg_target_call_oarg_regs[] = { > # define TCG_REG_L1 TCG_REG_EDX > #endif > > +/* Attempt to determine at compile-time whether the compiler assumes that > + cmov is available. We get 64-bit for free. P6 (i686) and later include > + support for cmov, but there is no one preprocessor define that determines > + this. Assume that all processors that include sse also support cmov, so > + that we sorta future-proof this test against new preprocessor defines. */ > +#include <cpuid.h> > +#if (TCG_TARGET_REG_BITS == 64 \ > + || defined(__i686__) || defined(__pentium4__) \ > + || defined(__athlon__) || defined(__SSE__)) > +# define have_cmov 1 > +#else > +static bool have_cmov; > +#endif > + > static uint8_t *tb_ret_addr; > > static void patch_reloc(uint8_t *code_ptr, int type, > @@ -943,7 +957,14 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond > cond, TCGArg dest, > TCGArg v1) > { > tcg_out_cmp(s, c1, c2, const_c2, 0); > - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); > + if (have_cmov) { > + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); > + } else { > + int over = gen_new_label(); > + tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); > + tcg_out_mov(s, TCG_TYPE_I32, dest, v1); > + tcg_out_label(s, over, s->code_ptr); > + } > } > > #if TCG_TARGET_REG_BITS == 64 > @@ -2243,6 +2264,17 @@ static void tcg_target_qemu_prologue(TCGContext *s) > > static void tcg_target_init(TCGContext *s) > { > + /* If we could not determine cmov availablity at compile time, perform > + the check at runtime. 99% certainty that we're running on hardware > + that supports cmov, but we still need to check. In case cmov is not > + available, we'll use a small forward branch. */ > +#ifndef have_cmov > + { > + unsigned a, b, c, d; > + have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV)); > + } > +#endif > + > #if !defined(CONFIG_USER_ONLY) > /* fail safe */ > if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index dbc6756..450078b 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -90,12 +90,7 @@ typedef enum { > #define TCG_TARGET_HAS_nand_i32 0 > #define TCG_TARGET_HAS_nor_i32 0 > #define TCG_TARGET_HAS_deposit_i32 1 > -#if defined(__x86_64__) || defined(__i686__) > -/* Use cmov only if the compiler is already doing so. */ > #define TCG_TARGET_HAS_movcond_i32 1 > -#else > -#define TCG_TARGET_HAS_movcond_i32 0 > -#endif > > #if TCG_TARGET_REG_BITS == 64 > #define TCG_TARGET_HAS_div2_i64 1 > -- > 1.7.11.7 > > -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net