On Thu, Sep 19, 2013 at 12:58:37PM -0700, Richard Henderson wrote: > We previously allocated 32-bits per temp for the next_free_temp entry. > We now allocate 4 bits per temp across the 4 bitmaps. > > Using a linked list meant that if a translator is tweeked, resulting in > temps being freed in a different order, that would have follow-on effects > throughout the TB. Always allocating the lowest free temp means that > follow-on effects are minimized, which can make it easier to diff output > when debugging the translators. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/tcg.c | 32 +++++++++++++++----------------- > tcg/tcg.h | 11 ++++++----- > 2 files changed, 21 insertions(+), 22 deletions(-) > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index fd7fb6b..41ed8b0 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -316,11 +316,12 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t > start, intptr_t size) > > void tcg_func_start(TCGContext *s) > { > - int i; > tcg_pool_reset(s); > s->nb_temps = s->nb_globals; > - for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) > - s->first_free_temp[i] = -1; > + > + /* No temps have been previously allocated for size or locality. */ > + memset(s->free_temps, 0, sizeof(s->free_temps)); > + > s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); > s->nb_labels = 0; > s->current_frame_offset = s->frame_start; > @@ -468,16 +469,15 @@ static inline int tcg_temp_new_internal(TCGType type, > int temp_local) > TCGTemp *ts; > int idx, k; > > - k = type; > - if (temp_local) > - k += TCG_TYPE_COUNT; > - idx = s->first_free_temp[k]; > - if (idx != -1) { > - /* There is already an available temp with the > - right type */ > + k = type + (temp_local ? TCG_TYPE_COUNT : 0); > + idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); > + if (idx < TCG_MAX_TEMPS) { > + /* There is already an available temp with the right type. */ > + clear_bit(idx, s->free_temps[k].l); > + > ts = &s->temps[idx]; > - s->first_free_temp[k] = ts->next_free_temp; > ts->temp_allocated = 1; > + assert(ts->base_type == type); > assert(ts->temp_local == temp_local); > } else { > idx = s->nb_temps; > @@ -533,7 +533,7 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local) > return MAKE_TCGV_I64(idx); > } > > -static inline void tcg_temp_free_internal(int idx) > +static void tcg_temp_free_internal(int idx) > { > TCGContext *s = &tcg_ctx; > TCGTemp *ts; > @@ -550,11 +550,9 @@ static inline void tcg_temp_free_internal(int idx) > ts = &s->temps[idx]; > assert(ts->temp_allocated != 0); > ts->temp_allocated = 0; > - k = ts->base_type; > - if (ts->temp_local) > - k += TCG_TYPE_COUNT; > - ts->next_free_temp = s->first_free_temp[k]; > - s->first_free_temp[k] = idx; > + > + k = ts->type + (ts->temp_local ? TCG_TYPE_COUNT : 0); > + set_bit(idx, s->free_temps[k].l); > } > > void tcg_temp_free_i32(TCGv_i32 arg) > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 902c751..c45e41b 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -26,7 +26,7 @@ > #define TCG_H > > #include "qemu-common.h" > - > +#include "qemu/bitops.h" > #include "tcg-target.h" > > /* Default target word size to pointer size. */ > @@ -400,8 +400,6 @@ typedef struct TCGTemp { > basic blocks. Otherwise, it is not > preserved across basic blocks. */ > unsigned int temp_allocated:1; /* never used for code gen */ > - /* index of next free temp of same base type, -1 if end */ > - int next_free_temp; > const char *name; > } TCGTemp; > > @@ -412,6 +410,10 @@ typedef struct TCGHelperInfo { > > typedef struct TCGContext TCGContext; > > +typedef struct TCGTempSet { > + unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; > +} TCGTempSet; > + > struct TCGContext { > uint8_t *pool_cur, *pool_end; > TCGPool *pool_first, *pool_current, *pool_first_large; > @@ -419,8 +421,6 @@ struct TCGContext { > int nb_labels; > int nb_globals; > int nb_temps; > - /* index of free temps, -1 if none */ > - int first_free_temp[TCG_TYPE_COUNT * 2]; > > /* goto_tb support */ > uint8_t *code_buf; > @@ -446,6 +446,7 @@ struct TCGContext { > > uint8_t *code_ptr; > TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ > + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; > > TCGHelperInfo *helpers; > int nb_helpers;
It looks fine to me, though I don't know if it has any impact on the performances. find_first_bit() is calling find_next_bit() which is 87 instructions longs on x86_64. Maybe implementing find_first_bit() directly would help a bit there. That said: Reviewed-by: Aurelien Jarno <aurel...@aurel32.net> -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net