[PING] Re: [PATCH] add self-tuning to x86 hardware fast path in libitm

Andi Kleen Thu, 14 May 2015 15:08:38 -0700

Ping!

Could someone who can approve please review the patch?


Thanks,
-Andi

Nuno Diegues <n...@ist.utl.pt> writes:

> Hello,
>
> I have taken the chance to improve the patch by addressing the
> comments above in this thread.
> Namely:
>  - to use a simple random generator managed inside the library only
>  - removed floating point usage and replaced by fixed arithmetic
>  - added some comments where relevant
>
> Re-running the STAMP benchmarks shows similar gains (to those shown
> above) with respect to an unmodified GCC 5.0.0:
>
> benchmark: speedup
> genome: 1.58
> intruder: 1.78
> labyrinth: 1.0
> ssca2: 1.01
> yada: 1.0
> kmeans-high: 1.16
> kmeans-low: 1.16
> vacation-high: 2.05
> vacation-low: 2.81
>
> I appreciate any feedback and comments!
>
> Index: libitm/libitm_i.h
> ===================================================================
> --- libitm/libitm_i.h (revision 219316)
> +++ libitm/libitm_i.h (working copy)
> @@ -242,6 +242,9 @@ struct gtm_thread
>    uint32_t restart_reason[NUM_RESTARTS];
>    uint32_t restart_total;
>
> +  // Keeps track of how many transactions were successfully executed.
> +  uint64_t number_executed_txns;
> +
>    // *** The shared part of gtm_thread starts here. ***
>    // Shared state is on separate cachelines to avoid false sharing with
>    // thread-local parts of gtm_thread.
> @@ -286,6 +289,8 @@ struct gtm_thread
>    static void *operator new(size_t);
>    static void operator delete(void *);
>
> +  static void reoptimize_htm_execution();
> +
>    // Invoked from assembly language, thus the "asm" specifier on
>    // the name, avoiding complex name mangling.
>    static uint32_t begin_transaction(uint32_t, const gtm_jmpbuf *)
> @@ -309,6 +314,59 @@ struct gtm_thread
>    void commit_user_actions ();
>  };
>
> +// Different ways to deal with capacity aborts in HTM execution.
> +enum gtm_capacity_abort_mode
> +{
> +  STUBBORN,
> +  HALVEN,
> +  GIVEUP
> +};
> +
> +// Definition of fixed point arithmetic types.
> +// Half the bits are dedicated to the fractional type, and the rest to the
> +// "whole" part.
> +#define FIXED_PT_WIDTH  64
> +#define FIXED_PT_INTEGER_WIDTH  32
> +typedef uint64_t fixed_pt_t;
> +typedef __uint128_t fixed_pt_td;
> +
> +#define FIXED_PT_FRAC_WIDTH     (FIXED_PT_WIDTH - FIXED_PT_INTEGER_WIDTH)
> +#define FIXED_PT_ONE    ((fixed_pt_t)((fixed_pt_t)1 << FIXED_PT_FRAC_WIDTH))
> +#define FIXED_PT_TWO    (FIXED_PT_ONE + FIXED_PT_ONE)
> +
> +#define fixed_mul(A,B) \
> +  ((fixed_pt_t)(((fixed_pt_td)(A) * (fixed_pt_td)(B)) >> 
> FIXED_PT_FRAC_WIDTH))
> +#define fixed_div(A,B) \
> +  ((fixed_pt_t)(((fixed_pt_td)(A) << FIXED_PT_FRAC_WIDTH) / 
> (fixed_pt_td)(B)))
> +#define fixed_const(R) \
> +  ((fixed_pt_t)((R) * FIXED_PT_ONE + ((R) >= 0 ? 0.5 : -0.5)))
> +
> +// Maintains the current values optimized for HTM execution and the
> +// corresponding statistics gathered for the decision-making.
> +struct gtm_global_optimizer
> +{
> +  // Mode chosen to currently deal with capacity aborts.
> +  gtm_capacity_abort_mode optimized_mode;
> +  // Number of attempts chosen to currently insist on HTM execution.
> +  uint32_t optimized_attempts;
> +
> +  uint64_t last_cycles;
> +  uint64_t last_total_txs_executed;
> +
> +  fixed_pt_t last_throughput;
> +  uint32_t last_attempts;
> +
> +  fixed_pt_t best_ever_throughput;
> +  uint32_t best_ever_attempts;
> +
> +  uint64_t txns_while_stubborn;
> +  uint64_t cycles_while_stubborn;
> +  uint64_t txns_while_halven;
> +  uint64_t cycles_while_halven;
> +  uint64_t txns_while_giveup;
> +  uint64_t cycles_while_giveup;
> +};
> +
>  } // namespace GTM
>
>  #include "tls.h"
> @@ -346,6 +404,9 @@ extern gtm_cacheline_mask gtm_mask_stack(gtm_cache
>  // the name, avoiding complex name mangling.
>  extern uint32_t htm_fastpath __asm__(UPFX "gtm_htm_fastpath");
>
> +// Maintains the optimization for HTM execution.
> +extern gtm_global_optimizer optimizer;
> +
>  } // namespace GTM
>
>  #endif // LIBITM_I_H
> Index: libitm/libitm.h
> ===================================================================
> --- libitm/libitm.h (revision 219316)
> +++ libitm/libitm.h (working copy)
> @@ -101,7 +101,8 @@ typedef enum
>     /* These are not part of the ABI but used for custom HTM fast paths.  See
>        ITM_beginTransaction and gtm_thread::begin_transaction.  */
>     pr_HTMRetryableAbort = 0x800000,
> -   pr_HTMRetriedAfterAbort = 0x1000000
> +   pr_HTMRetriedAfterAbort = 0x1000000,
> +   pr_HTMCapacityAbort          = 0x2000000
>  } _ITM_codeProperties;
>
>  /* Result from startTransaction that describes what actions to take.
> Index: libitm/method-serial.cc
> ===================================================================
> --- libitm/method-serial.cc (revision 219316)
> +++ libitm/method-serial.cc (working copy)
> @@ -223,7 +223,23 @@ struct htm_mg : public method_group
>      // initially disabled.
>  #ifdef USE_HTM_FASTPATH
>      htm_fastpath = htm_init();
> +#ifdef HAVE_AS_RTM
> +    optimizer.optimized_mode = STUBBORN;
> +    optimizer.optimized_attempts = htm_fastpath;
> +    optimizer.last_cycles = rdtsc();
> +    optimizer.last_total_txs_executed = 0;
> +    optimizer.last_throughput = fixed_const(0.0001);
> +    optimizer.last_attempts = htm_fastpath > 0 ? htm_fastpath - 1 : 1;
> +    optimizer.best_ever_throughput = optimizer.last_throughput;
> +    optimizer.best_ever_attempts = htm_fastpath;
> +    optimizer.txns_while_stubborn = 1;
> +    optimizer.cycles_while_stubborn = 100;
> +    optimizer.txns_while_halven = 1;
> +    optimizer.cycles_while_halven = 100;
> +    optimizer.txns_while_giveup = 1;
> +    optimizer.cycles_while_giveup = 100;
>  #endif
> +#endif
>    }
>    virtual void fini()
>    {
> Index: libitm/beginend.cc
> ===================================================================
> --- libitm/beginend.cc (revision 219316)
> +++ libitm/beginend.cc (working copy)
> @@ -25,7 +25,6 @@
>  #include "libitm_i.h"
>  #include <pthread.h>
>
> -
>  using namespace GTM;
>
>  #if !defined(HAVE_ARCH_GTM_THREAD) || !defined(HAVE_ARCH_GTM_THREAD_DISP)
> @@ -39,6 +38,9 @@ unsigned GTM::gtm_thread::number_of_threads = 0;
>  gtm_stmlock GTM::gtm_stmlock_array[LOCK_ARRAY_SIZE];
>  atomic<gtm_version> GTM::gtm_clock;
>
> +// Optimization of HTM executions.
> +gtm_global_optimizer GTM::optimizer;
> +
>  /* ??? Move elsewhere when we figure out library initialization.  */
>  uint64_t GTM::gtm_spin_count_var = 1000;
>
> @@ -149,6 +151,225 @@ choose_code_path(uint32_t prop, abi_dispatch *disp
>      return a_runInstrumentedCode;
>  }
>
> +static inline fixed_pt_t
> +fixed_sqrt(fixed_pt_t n)
> +{
> +    int invert = 0;
> +    int iter = FIXED_PT_FRAC_WIDTH;
> +    int l, i;
> +
> +    if (n == 0 || n == FIXED_PT_ONE)
> +      {
> +        return n;
> +      }
> +    if (n < FIXED_PT_ONE && n > 6)
> +      {
> +        invert = 1;
> +        n = fixed_div(FIXED_PT_ONE, n);
> +      }
> +    if (n > FIXED_PT_ONE)
> +      {
> +        int s = n;
> +        iter = 0;
> +        while (s > 0)
> +          {
> +            s >>= 2;
> +            iter++;
> +          }
> +      }
> +
> +    l = (n >> 1) + 1;
> +    for (i = 0; i < iter; i++)
> +      {
> +        l = (l + fixed_div(n, l)) >> 1;
> +      }
> +    if (invert)
> +      {
> +        return (fixed_div(FIXED_PT_ONE, l));
> +      }
> +    return l;
> +}
> +
> +static inline fixed_pt_t
> +fixed_ln(fixed_pt_t x)
> +{
> +  const fixed_pt_t LN2 = fixed_const(0.69314718055994530942);
> +  const fixed_pt_t LG[7] = {
> +    fixed_const(6.666666666666735130e-01),
> +    fixed_const(3.999999999940941908e-01),
> +    fixed_const(2.857142874366239149e-01),
> +    fixed_const(2.222219843214978396e-01),
> +    fixed_const(1.818357216161805012e-01),
> +    fixed_const(1.531383769920937332e-01),
> +    fixed_const(1.479819860511658591e-01)
> +  };
> +
> +  if (x == 0)
> +    {
> +      return 0xffffffff;
> +    }
> +
> +  fixed_pt_t log2 = 0;
> +  fixed_pt_t xi = x;
> +  while (xi > FIXED_PT_TWO)
> +    {
> +      xi >>= 1;
> +      log2++;
> +    }
> +
> +  fixed_pt_t f = xi - FIXED_PT_ONE;
> +  fixed_pt_t s = fixed_div(f, FIXED_PT_TWO + f);
> +  fixed_pt_t z = fixed_mul(s, s);
> +  fixed_pt_t w = fixed_mul(z, z);
> +  fixed_pt_t R =
> +    fixed_mul(w, LG[1] + fixed_mul(w, LG[3] + fixed_mul(w, LG[5])))
> +    + fixed_mul(z, LG[0] + fixed_mul(w, LG[2]
> +        + fixed_mul(w, LG[4] + fixed_mul(w, LG[6]))));
> +  return fixed_mul(LN2, (log2 << FIXED_PT_FRAC_WIDTH))
> +    + f - fixed_mul(s, f - R);
> +}
> +
> +// State not synchronized; assumes single thread usage at any time.
> +// Invoked only by the thread reoptimizing, so assumption holds.
> +int
> +obtainRandomInt(int max)
> +{
> +  static int seed = 123456789;
> +  seed = (1103515245 * seed + 12345) % 4294967296;
> +  return seed;
> +}
> +
> +// Called by the thread at the tail of the list of threads.
> +void
> +GTM::gtm_thread::reoptimize_htm_execution()
> +{
> +  gtm_thread *tx = gtm_thr();
> +  uint64_t total_txs_executed = 0;
> +
> +  // Collect the statistics obtained so far.
> +  serial_lock.read_lock(tx);
> +  gtm_thread *ptr = list_of_threads;
> +  for (; ptr; ptr = ptr->next_thread)
> +    {
> +      total_txs_executed += ptr->number_executed_txns;
> +    }
> +  serial_lock.read_unlock(tx);
> +
> +  // Obtain the delta performance with respect to the last period.
> +  const uint64_t current_cycles = rdtsc();
> +  const uint64_t cycles_used = current_cycles - optimizer.last_cycles;
> +  const uint64_t new_txs_executed =
> +    total_txs_executed - optimizer.last_total_txs_executed;
> +  optimizer.last_cycles = current_cycles;
> +  optimizer.last_total_txs_executed = total_txs_executed;
> +  if (optimizer.optimized_mode == STUBBORN)
> +    {
> +      optimizer.txns_while_stubborn += new_txs_executed;
> +      optimizer.cycles_while_stubborn += cycles_used;
> +    }
> +  else if (optimizer.optimized_mode == HALVEN)
> +    {
> +      optimizer.txns_while_halven += new_txs_executed;
> +      optimizer.cycles_while_halven += cycles_used;
> +    }
> +  else
> +    {
> +      optimizer.txns_while_giveup += new_txs_executed;
> +      optimizer.cycles_while_giveup += cycles_used;
> +    }
> +
> +  // Compute Upper Confidence Bounds for the mode to choose next.
> +  // Use fixed point arithmetic types to spare floating point usage.
> +  const fixed_pt_t log_sum =
> +    fixed_mul(FIXED_PT_TWO,
> +      fixed_ln(fixed_const(optimizer.txns_while_stubborn)
> +               + fixed_const(optimizer.txns_while_halven)
> +               + fixed_const(optimizer.txns_while_giveup)));
> +  const fixed_pt_t ucb_stubborn =
> +    fixed_div(fixed_div(fixed_const(optimizer.txns_while_stubborn),
> +                        fixed_const(optimizer.cycles_while_stubborn)),
> +              optimizer.best_ever_throughput)
> +    + fixed_sqrt(fixed_div(log_sum,
> +                           fixed_const(optimizer.txns_while_stubborn)));
> +  const fixed_pt_t ucb_halven =
> +    fixed_div(fixed_div(fixed_const(optimizer.txns_while_halven),
> +                        fixed_const(optimizer.cycles_while_halven)),
> +              optimizer.best_ever_throughput)
> +    + fixed_sqrt(fixed_div(log_sum, 
> fixed_const(optimizer.txns_while_halven)));
> +  const fixed_pt_t ucb_giveup =
> +    fixed_div(fixed_div(fixed_const(optimizer.txns_while_giveup),
> +                        fixed_const(optimizer.cycles_while_giveup)),
> +              optimizer.best_ever_throughput)
> +    + fixed_sqrt(fixed_div(log_sum, 
> fixed_const(optimizer.txns_while_giveup)));
> +
> +  if (ucb_stubborn > ucb_halven && ucb_stubborn > ucb_giveup)
> +    {
> +      optimizer.optimized_mode = STUBBORN;
> +    }
> +  else if (ucb_halven > ucb_giveup)
> +    {
> +      optimizer.optimized_mode = HALVEN;
> +    }
> +  else
> +    {
> +      optimizer.optimized_mode = GIVEUP;
> +    }
> +
> +  // Compute gradient descent for the number of retries.
> +  const fixed_pt_t current_throughput =
> +    fixed_div(fixed_const(new_txs_executed), fixed_const(cycles_used));
> +  const fixed_pt_t change_for_better =
> +    fixed_div(current_throughput, optimizer.last_throughput);
> +  const fixed_pt_t change_for_worse =
> +    fixed_div(optimizer.last_throughput, current_throughput);
> +  int32_t last_attempts = optimizer.last_attempts;
> +  int32_t current_attempts = optimizer.optimized_attempts;
> +  int32_t new_attempts = current_attempts;
> +  if (unlikely(change_for_worse > fixed_const(1.40)))
> +    {
> +      optimizer.optimized_attempts = optimizer.best_ever_attempts;
> +      optimizer.last_throughput = current_throughput;
> +      optimizer.last_attempts = current_attempts;
> +      return;
> +    }
> +
> +  if (unlikely(obtainRandomInt(100) == 0))
> +    {
> +      optimizer.last_attempts = optimizer.optimized_attempts;
> +      optimizer.last_throughput = current_throughput;
> +      optimizer.optimized_attempts = obtainRandomInt(18) + 2;
> +      return;
> +    }
> +
> +      if (change_for_better > fixed_const(1.05))
> +        {
> +          new_attempts += current_attempts - last_attempts;
> +          if (current_attempts == last_attempts)
> +            new_attempts += obtainRandomInt(2) == 0 ? -1 : 1;
> +        }
> +      else if (change_for_worse > fixed_const(1.05))
> +        {
> +          new_attempts -= current_attempts - last_attempts;
> +          if (current_attempts == last_attempts)
> +            new_attempts += obtainRandomInt(2) == 0 ? -1 : 1;
> +        }
> +      if (unlikely(new_attempts > 20))
> +        new_attempts = 20;
> +      else if (unlikely(new_attempts < 2))
> +        new_attempts = 2;
> +      if (current_attempts != new_attempts)
> +        {
> +          optimizer.last_attempts = current_attempts;
> +          optimizer.last_throughput = current_throughput;
> +        }
> +      optimizer.optimized_attempts = new_attempts;
> +      if (unlikely(current_throughput > optimizer.best_ever_throughput))
> +        {
> +          optimizer.best_ever_throughput = current_throughput;
> +          optimizer.best_ever_attempts = current_attempts;
> +        }
> +}
> +
>  uint32_t
>  GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
>  {
> @@ -190,7 +411,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop,
>  #ifndef HTM_CUSTOM_FASTPATH
>    if (likely(htm_fastpath && (prop & pr_hasNoAbort)))
>      {
> -      for (uint32_t t = htm_fastpath; t; t--)
> +      for (uint32_t t = optimizer.optimized_attempts; t; t--)
>   {
>    uint32_t ret = htm_begin();
>    if (htm_begin_success(ret))
> @@ -209,19 +430,36 @@ GTM::gtm_thread::begin_transaction (uint32_t prop,
>      }
>    // The transaction has aborted.  Don't retry if it's unlikely that
>    // retrying the transaction will be successful.
> -  if (!htm_abort_should_retry(ret))
> +#ifdef HAVE_AS_RTM
> +          if (htm_is_capacity_abort(ret))
> +            {
> +              gtm_capacity_abort_mode capacity_mode = 
> optimizer.optimized_mode;
> +              if (capacity_mode == HALVEN)
> +                t = t / 2 + 1;
> +              else if (capacity_mode == GIVEUP)
> +                t = 1;
> +            }
> +          // Only one thread performs this to avoid the need for
> +          // synchronization. We use the oldest thread that is active.
> +          // We also reoptimize at most once per transaction.
> +          if (unlikely(tx->next_thread == NULL &&
> +              tx->number_executed_txns % 500 == 0 && t == 1))
> +              reoptimize_htm_execution();
> +#else
> +          if (!htm_abort_should_retry(ret))
>      break;
> +#endif
>    // Wait until any concurrent serial-mode transactions have finished.
>    // This is an empty critical section, but won't be elided.
>    if (serial_lock.is_write_locked())
>      {
> -      tx = gtm_thr();
> -      if (unlikely(tx == NULL))
> -        {
> -          // See below.
> -          tx = new gtm_thread();
> -          set_gtm_thr(tx);
> -        }
> +              tx = gtm_thr();
> +              if (unlikely(tx == NULL))
> +                {
> +                  // See below.
> +                  tx = new gtm_thread();
> +                  set_gtm_thr(tx);
> +                }
>        // Check whether there is an enclosing serial-mode transaction;
>        // if so, we just continue as a nested transaction and don't
>        // try to use the HTM fastpath.  This case can happen when an
> @@ -262,12 +500,26 @@ GTM::gtm_thread::begin_transaction (uint32_t prop,
>        // other fallback will use serial transactions, which don't use
>        // restart_total but will reset it when committing.
>        if (!(prop & pr_HTMRetriedAfterAbort))
> - tx->restart_total = htm_fastpath;
> +        tx->restart_total = optimizer.optimized_attempts;
>
>        if (--tx->restart_total > 0)
>   {
>    // Wait until any concurrent serial-mode transactions have finished.
>    // Essentially the same code as above.
> +#ifdef HAVE_AS_RTM
> +          if (prop & pr_HTMCapacityAbort)
> +            {
> +              gtm_capacity_abort_mode capacity_mode = 
> optimizer.optimized_mode;
> +              if (capacity_mode == HALVEN)
> +                tx->restart_total = tx->restart_total;
> +              else if (capacity_mode == GIVEUP)
> +                goto stop_custom_htm_fastpath;
> +            }
> +
> +          if (unlikely(tx->next_thread == NULL &&
> +              tx->number_executed_txns % 500 == 0 && tx->restart_total == 1))
> +              reoptimize_htm_execution();
> +#endif
>    if (serial_lock.is_write_locked())
>      {
>        if (tx->nesting > 0)
> @@ -665,12 +917,21 @@ _ITM_commitTransaction(void)
>    if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
>      {
>        htm_commit();
> +      gtm_thread *tx = gtm_thr();
> +      if (unlikely(tx == NULL))
> +        {
> +          tx = new gtm_thread();
> +          set_gtm_thr(tx);
> +        }
> +      tx->number_executed_txns++;
>        return;
>      }
>  #endif
>    gtm_thread *tx = gtm_thr();
>    if (!tx->trycommit ())
>      tx->restart (RESTART_VALIDATE_COMMIT);
> +
> +  tx->number_executed_txns++;
>  }
>
>  void ITM_REGPARM
> @@ -681,6 +942,13 @@ _ITM_commitTransactionEH(void *exc_ptr)
>    if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
>      {
>        htm_commit();
> +      gtm_thread *tx = gtm_thr();
> +      if (unlikely(tx == NULL))
> +        {
> +          tx = new gtm_thread();
> +          set_gtm_thr(tx);
> +        }
> +      tx->number_executed_txns++;
>        return;
>      }
>  #endif
> @@ -690,4 +958,5 @@ _ITM_commitTransactionEH(void *exc_ptr)
>        tx->eh_in_flight = exc_ptr;
>        tx->restart (RESTART_VALIDATE_COMMIT);
>      }
> +  tx->number_executed_txns++;
>  }
> Index: libitm/config/x86/target.h
> ===================================================================
> --- libitm/config/x86/target.h (revision 219316)
> +++ libitm/config/x86/target.h (working copy)
> @@ -126,12 +126,25 @@ htm_abort_should_retry (uint32_t begin_ret)
>    return begin_ret & _XABORT_RETRY;
>  }
>
> +static inline bool
> +htm_is_capacity_abort(uint32_t begin_ret)
> +{
> +  return begin_ret & _XABORT_CAPACITY;
> +}
> +
>  /* Returns true iff a hardware transaction is currently being executed.  */
>  static inline bool
>  htm_transaction_active ()
>  {
>    return _xtest() != 0;
>  }
> +
> +static inline uint64_t rdtsc()
> +{
> +    uint32_t hi, lo;
> +    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
> +    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
> +}
>  #endif
>
>
> Index: libitm/config/x86/sjlj.S
> ===================================================================
> --- libitm/config/x86/sjlj.S (revision 219316)
> +++ libitm/config/x86/sjlj.S (working copy)
> @@ -59,12 +59,14 @@
>  #define pr_hasNoAbort 0x08
>  #define pr_HTMRetryableAbort 0x800000
>  #define pr_HTMRetriedAfterAbort 0x1000000
> +#define pr_HTMCapacityAbort     0x2000000
>  #define a_runInstrumentedCode 0x01
>  #define a_runUninstrumentedCode 0x02
>  #define a_tryHTMFastPath 0x20
>
>  #define _XABORT_EXPLICIT (1 << 0)
>  #define _XABORT_RETRY (1 << 1)
> +#define _XABORT_CAPACITY (1 << 3)
>
>   .text
>
> @@ -108,9 +110,12 @@ SYM(_ITM_beginTransaction):
>  .Ltxn_abort:
>   /* If it might make sense to retry the HTM fast path, let the C++
>     code decide.  */
> - testl $(_XABORT_RETRY|_XABORT_EXPLICIT), %eax
> + testl $(_XABORT_RETRY|_XABORT_EXPLICIT|_XABORT_CAPACITY), %eax
>   jz .Lno_htm
>   orl $pr_HTMRetryableAbort, %edi
> + testl   $(_XABORT_CAPACITY), %eax
> + jz      .Lno_htm
> + orl     $pr_HTMCapacityAbort, %edi
>   /* Let the C++ code handle the retry policy.  */
>  .Lno_htm:
>  #endif
>
>
>
>
> On Fri, Apr 10, 2015 at 8:24 AM, Andi Kleen <a...@firstfloor.org> wrote:
>> Nuno Diegues <n...@ist.utl.pt> writes:
>>>
>>> One general question on how to proceed:
>>> given that I make some further changes, should I post the whole patch again?
>>
>> Yes please resend the patch.
>>
>> -Andi
>

-- 
a...@linux.intel.com -- Speaking for myself only

[PING] Re: [PATCH] add self-tuning to x86 hardware fast path in libitm

Reply via email to