https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68398

            Bug ID: 68398
           Summary: coremark regression due to r229685
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: spop at gcc dot gnu.org
  Target Milestone: ---

We have seen a performance regression due to r229685.
We see fewer FSM jump threads on the reduced testcase.

CC=2015-11-02-23-23-28-d3063db-trunk/bin/gcc
$CC -O3 m.c -fdump-tree-dom1-details=a -o a.out
CC=2015-11-02-23-25-06-f497d67-trunk/bin/gcc
$CC -O3 m.c -fdump-tree-dom1-details=b -o b.out

$ grep FSM a | wc -l
17

$ grep FSM b | wc -l
15

on x86_64 valgrind indicates that with the patch we have 2.5% more instructions
executed:

+ valgrind --dsymutil=yes --tool=callgrind --callgrind-out-file=a.call ./a.out
==27524== Callgrind, a call-graph generating cache profiler
==27524== Copyright (C) 2002-2013, and GNU GPL'd, by Josef Weidendorfer et al.
==27524== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright
info
==27524== Command: ./a.out
==27524== 
==27524== For interactive control, run 'callgrind_control -h'.
==27524== 
==27524== Events    : Ir
==27524== Collected : 209839882
==27524== 
==27524== I   refs:      209,839,882
+ valgrind --dsymutil=yes --tool=callgrind --callgrind-out-file=b.call ./b.out
==27585== Callgrind, a call-graph generating cache profiler
==27585== Copyright (C) 2002-2013, and GNU GPL'd, by Josef Weidendorfer et al.
==27585== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright
info
==27585== Command: ./b.out
==27585== 
==27585== For interactive control, run 'callgrind_control -h'.
==27585== 
==27585== Events    : Ir
==27585== Collected : 213154557
==27585== 
==27585== I   refs:      213,154,557
+ callgrind_annotate a.call
--------------------------------------------------------------------------------
Profile data file 'a.call' (creator: callgrind-3.10.0.SVN)
--------------------------------------------------------------------------------
I1 cache: 
D1 cache: 
LL cache: 
Timerange: Basic block 0 - 46055772
Trigger: Program termination
Profiled target:  ./a.out (PID 27524, part 1)
Events recorded:  Ir
Events shown:     Ir
Event sort order: Ir
Thresholds:       99
Include dirs:     
User annotated:   
Auto-annotation:  off

--------------------------------------------------------------------------------
         Ir 
--------------------------------------------------------------------------------
209,839,882  PROGRAM TOTALS

--------------------------------------------------------------------------------
         Ir  file:function
--------------------------------------------------------------------------------
138,250,035  ???:core_bench_list [a.out]
 69,160,889  ???:core_list_mergesort.constprop.2 [a.out]
  2,309,860  ???:core_list_init [a.out]

+ callgrind_annotate b.call
--------------------------------------------------------------------------------
Profile data file 'b.call' (creator: callgrind-3.10.0.SVN)
--------------------------------------------------------------------------------
I1 cache: 
D1 cache: 
LL cache: 
Timerange: Basic block 0 - 48409229
Trigger: Program termination
Profiled target:  ./b.out (PID 27585, part 1)
Events recorded:  Ir
Events shown:     Ir
Event sort order: Ir
Thresholds:       99
Include dirs:     
User annotated:   
Auto-annotation:  off

--------------------------------------------------------------------------------
         Ir 
--------------------------------------------------------------------------------
213,154,557  PROGRAM TOTALS

--------------------------------------------------------------------------------
         Ir  file:function
--------------------------------------------------------------------------------
138,845,638  ???:core_bench_list [b.out]
 71,879,961  ???:core_list_mergesort.constprop.2 [b.out]
  2,309,860  ???:core_list_init [b.out]

$ cat m.c
typedef struct list_data_s {
  short data16;
  short idx;
} list_data;

typedef struct list_head_s {
  struct list_head_s *next;
  struct list_data_s *info;
} list_head;

list_head *core_list_find(list_head *list,list_data *info);
list_head *core_list_reverse(list_head *list);
list_head *core_list_remove(list_head *item);
list_head *core_list_undo_remove(list_head *item_removed, list_head
*item_modified);
list_head *core_list_insert_new(list_head *insert_point
                                , list_data *info, list_head **memblock,
list_data **datablock
                                , list_head *memblock_end, list_data
*datablock_end);
typedef int(*list_cmp)(list_data *a, list_data *b);
list_head *core_list_mergesort(list_head *list, list_cmp cmp);

short state_scores[4] = {-29126, 24894, -24736, -272};

short matrix_scores[4] = {8151, -30381, -32453, 11169};

unsigned state_idx = 0, matrix_idx = 0;

short calc_func(short *pdata) {
  short data=*pdata;
  short retval;
  unsigned char optype=(data>>7) & 1; 
  if (optype) 
    return (data & 0x007f);
  else { 
    short flag=data & 0x7; 
    short dtype=((data>>3) & 0xf); 
    dtype |= dtype << 4; 
    switch (flag) {
    case 0:
      if (dtype<0x22) 
        dtype=0x22;
      retval=state_scores[state_idx++];
      break;
    case 1:
      retval=matrix_scores[matrix_idx++];
      break;
    default:
      retval=data;
      break;
    }
    retval &= 0x007f;
    *pdata = (data & 0xff00) | 0x0080 | retval; 
    return retval;
  }
}


int cmp_complex(list_data *a, list_data *b) {
  short val1=calc_func(&(a->data16));
  short val2=calc_func(&(b->data16));
  return val1 - val2;
}


int cmp_idx(list_data *a, list_data *b) {
  a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8));
  b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8));
  return a->idx - b->idx;
}

void copy_info(list_data *to,list_data *from) {
  to->data16=from->data16;
  to->idx=from->idx;
}


unsigned short core_bench_list(list_head *list, short find_num, short
finder_idx) {
  unsigned short retval=0;
  unsigned short found=0,missed=0;
  list_head *this_find;
  list_head *finder, *remover;
  list_data info;
  short i;

  info.idx=finder_idx;

  for (i=0; i<find_num; i++) {
    info.data16= (i & 0xff) ;
    this_find=core_list_find(list,&info);
    list=core_list_reverse(list);
    if (this_find==0) {
      missed++;
      retval+=(list->next->info->data16 >> 8) & 1;
    }
    else {
      found++;
      if (this_find->info->data16 & 0x1) 
        retval+=(this_find->info->data16 >> 9) & 1;

      if (this_find->next != 0) {
        finder = this_find->next;
        this_find->next = finder->next;
        finder->next=list->next;
        list->next=finder;
      }
    }
    if (info.idx>=0)
      info.idx++;
  }
  retval+=found*4-missed;

  if (finder_idx>0)
    list=core_list_mergesort(list,cmp_complex);
  remover=core_list_remove(list->next);

  finder=core_list_find(list,&info);
  if (!finder)
    finder=list->next;
  while (finder) {
    finder=finder->next;
  }
  remover=core_list_undo_remove(remover,list->next);

  list=core_list_mergesort(list,cmp_idx);

  finder=list->next;
  while (finder) {
    finder=finder->next;
  }
  return retval;
}

list_head *core_list_init(unsigned blksize, list_head *memblock, short seed) {

  unsigned per_item=16+sizeof(struct list_data_s);
  unsigned size=(blksize/per_item)-2; 
  list_head *memblock_end=memblock+size;
  list_data *datablock=(list_data *)(memblock_end);
  list_data *datablock_end=datablock+size;

  unsigned i;
  list_head *finder,*list=memblock;
  list_data info;


  list->next=0;
  list->info=datablock;
  list->info->idx=0x0000;
  list->info->data16=(short)0x8080;
  memblock++;
  datablock++;
  info.idx=0x7fff;
  info.data16=(short)0xffff;
 
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);


  for (i=0; i<size; i++) {
    unsigned short datpat=((unsigned short)(seed^i) & 0xf);
    unsigned short dat=(datpat<<3) | (i&0x7); 
    info.data16=(dat<<8) | dat;         
   
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
  }

  finder=list->next;
  i=1;
  while (finder->next!=0) {
    if (i<size/5) 
      finder->info->idx=i++;
    else {
      unsigned short pat=(unsigned short)(i++ ^ seed); 
      finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); 
    }
    finder=finder->next;
  }
  list = core_list_mergesort(list,cmp_idx);
  return list;
}


list_head *core_list_insert_new(list_head *insert_point, list_data *info,
list_head **memblock, list_data **datablock
                                , list_head *memblock_end, list_data
*datablock_end) {
  list_head *newitem;

  if ((*memblock+1) >= memblock_end)
    return 0;
  if ((*datablock+1) >= datablock_end)
    return 0;

  newitem=*memblock;
  (*memblock)++;
  newitem->next=insert_point->next;
  insert_point->next=newitem;

  newitem->info=*datablock;
  (*datablock)++;
  copy_info(newitem->info,info);

  return newitem;
}


list_head *core_list_remove(list_head *item) {
  list_data *tmp;
  list_head *ret=item->next;

  tmp=item->info;
  item->info=ret->info;
  ret->info=tmp;

  item->next=item->next->next;
  ret->next=0;
  return ret;
}


list_head *core_list_undo_remove(list_head *item_removed, list_head
*item_modified) {
  list_data *tmp;

  tmp=item_removed->info;
  item_removed->info=item_modified->info;
  item_modified->info=tmp;

  item_removed->next=item_modified->next;
  item_modified->next=item_removed;
  return item_removed;
}


list_head *core_list_find(list_head *list,list_data *info) {
  if (info->idx>=0) {
    while (list && (list->info->idx != info->idx))
      list=list->next;
    return list;
  } else {
    while (list && ((list->info->data16 & 0xff) != info->data16))
      list=list->next;
    return list;
  }
}


list_head *core_list_reverse(list_head *list) {
  list_head *next=0, *tmp;
  while (list) {
    tmp=list->next;
    list->next=next;
    next=list;
    list=tmp;
  }
  return next;
}

list_head *core_list_mergesort(list_head *list, list_cmp cmp) {
  list_head *p, *q, *e, *tail;
  int insize, nmerges, psize, qsize, i;

  insize = 1;

  while (1) {
    p = list;
    list = 0;
    tail = 0;

    nmerges = 0;  

    while (p) {
      nmerges++;  

      q = p;
      psize = 0;
      for (i = 0; i < insize; i++) {
        psize++;
        q = q->next;
        if (!q) break;
      }


      qsize = insize;


      while (psize > 0 || (qsize > 0 && q)) {


        if (psize == 0) {

          e = q; q = q->next; qsize--;
        } else if (qsize == 0 || !q) {

          e = p; p = p->next; psize--;
        } else if (cmp(p->info,q->info) <= 0) {

          e = p; p = p->next; psize--;
        } else {

          e = q; q = q->next; qsize--;
        }


        if (tail) {
          tail->next = e;
        } else {
          list = e;
        }
        tail = e;
      }


      p = q;
    }

    tail->next = 0;


    if (nmerges <= 1)   
      return list;


    insize *= 2;
  }
  return list;
}

#define N 1000000
unsigned char pp[N];
int main (int argc, char *argv[]) {

  list_head *head = core_list_init(N, (list_head *)pp, 0);
  core_bench_list(head, 102, 1);
  core_bench_list(head, 102, -1);
  return 0;
}

Reply via email to