https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68398
Bug ID: 68398 Summary: coremark regression due to r229685 Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: spop at gcc dot gnu.org Target Milestone: --- We have seen a performance regression due to r229685. We see fewer FSM jump threads on the reduced testcase. CC=2015-11-02-23-23-28-d3063db-trunk/bin/gcc $CC -O3 m.c -fdump-tree-dom1-details=a -o a.out CC=2015-11-02-23-25-06-f497d67-trunk/bin/gcc $CC -O3 m.c -fdump-tree-dom1-details=b -o b.out $ grep FSM a | wc -l 17 $ grep FSM b | wc -l 15 on x86_64 valgrind indicates that with the patch we have 2.5% more instructions executed: + valgrind --dsymutil=yes --tool=callgrind --callgrind-out-file=a.call ./a.out ==27524== Callgrind, a call-graph generating cache profiler ==27524== Copyright (C) 2002-2013, and GNU GPL'd, by Josef Weidendorfer et al. ==27524== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright info ==27524== Command: ./a.out ==27524== ==27524== For interactive control, run 'callgrind_control -h'. ==27524== ==27524== Events : Ir ==27524== Collected : 209839882 ==27524== ==27524== I refs: 209,839,882 + valgrind --dsymutil=yes --tool=callgrind --callgrind-out-file=b.call ./b.out ==27585== Callgrind, a call-graph generating cache profiler ==27585== Copyright (C) 2002-2013, and GNU GPL'd, by Josef Weidendorfer et al. ==27585== Using Valgrind-3.10.0.SVN and LibVEX; rerun with -h for copyright info ==27585== Command: ./b.out ==27585== ==27585== For interactive control, run 'callgrind_control -h'. ==27585== ==27585== Events : Ir ==27585== Collected : 213154557 ==27585== ==27585== I refs: 213,154,557 + callgrind_annotate a.call -------------------------------------------------------------------------------- Profile data file 'a.call' (creator: callgrind-3.10.0.SVN) -------------------------------------------------------------------------------- I1 cache: D1 cache: LL cache: Timerange: Basic block 0 - 46055772 Trigger: Program termination Profiled target: ./a.out (PID 27524, part 1) Events recorded: Ir Events shown: Ir Event sort order: Ir Thresholds: 99 Include dirs: User annotated: Auto-annotation: off -------------------------------------------------------------------------------- Ir -------------------------------------------------------------------------------- 209,839,882 PROGRAM TOTALS -------------------------------------------------------------------------------- Ir file:function -------------------------------------------------------------------------------- 138,250,035 ???:core_bench_list [a.out] 69,160,889 ???:core_list_mergesort.constprop.2 [a.out] 2,309,860 ???:core_list_init [a.out] + callgrind_annotate b.call -------------------------------------------------------------------------------- Profile data file 'b.call' (creator: callgrind-3.10.0.SVN) -------------------------------------------------------------------------------- I1 cache: D1 cache: LL cache: Timerange: Basic block 0 - 48409229 Trigger: Program termination Profiled target: ./b.out (PID 27585, part 1) Events recorded: Ir Events shown: Ir Event sort order: Ir Thresholds: 99 Include dirs: User annotated: Auto-annotation: off -------------------------------------------------------------------------------- Ir -------------------------------------------------------------------------------- 213,154,557 PROGRAM TOTALS -------------------------------------------------------------------------------- Ir file:function -------------------------------------------------------------------------------- 138,845,638 ???:core_bench_list [b.out] 71,879,961 ???:core_list_mergesort.constprop.2 [b.out] 2,309,860 ???:core_list_init [b.out] $ cat m.c typedef struct list_data_s { short data16; short idx; } list_data; typedef struct list_head_s { struct list_head_s *next; struct list_data_s *info; } list_head; list_head *core_list_find(list_head *list,list_data *info); list_head *core_list_reverse(list_head *list); list_head *core_list_remove(list_head *item); list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified); list_head *core_list_insert_new(list_head *insert_point , list_data *info, list_head **memblock, list_data **datablock , list_head *memblock_end, list_data *datablock_end); typedef int(*list_cmp)(list_data *a, list_data *b); list_head *core_list_mergesort(list_head *list, list_cmp cmp); short state_scores[4] = {-29126, 24894, -24736, -272}; short matrix_scores[4] = {8151, -30381, -32453, 11169}; unsigned state_idx = 0, matrix_idx = 0; short calc_func(short *pdata) { short data=*pdata; short retval; unsigned char optype=(data>>7) & 1; if (optype) return (data & 0x007f); else { short flag=data & 0x7; short dtype=((data>>3) & 0xf); dtype |= dtype << 4; switch (flag) { case 0: if (dtype<0x22) dtype=0x22; retval=state_scores[state_idx++]; break; case 1: retval=matrix_scores[matrix_idx++]; break; default: retval=data; break; } retval &= 0x007f; *pdata = (data & 0xff00) | 0x0080 | retval; return retval; } } int cmp_complex(list_data *a, list_data *b) { short val1=calc_func(&(a->data16)); short val2=calc_func(&(b->data16)); return val1 - val2; } int cmp_idx(list_data *a, list_data *b) { a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8)); b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8)); return a->idx - b->idx; } void copy_info(list_data *to,list_data *from) { to->data16=from->data16; to->idx=from->idx; } unsigned short core_bench_list(list_head *list, short find_num, short finder_idx) { unsigned short retval=0; unsigned short found=0,missed=0; list_head *this_find; list_head *finder, *remover; list_data info; short i; info.idx=finder_idx; for (i=0; i<find_num; i++) { info.data16= (i & 0xff) ; this_find=core_list_find(list,&info); list=core_list_reverse(list); if (this_find==0) { missed++; retval+=(list->next->info->data16 >> 8) & 1; } else { found++; if (this_find->info->data16 & 0x1) retval+=(this_find->info->data16 >> 9) & 1; if (this_find->next != 0) { finder = this_find->next; this_find->next = finder->next; finder->next=list->next; list->next=finder; } } if (info.idx>=0) info.idx++; } retval+=found*4-missed; if (finder_idx>0) list=core_list_mergesort(list,cmp_complex); remover=core_list_remove(list->next); finder=core_list_find(list,&info); if (!finder) finder=list->next; while (finder) { finder=finder->next; } remover=core_list_undo_remove(remover,list->next); list=core_list_mergesort(list,cmp_idx); finder=list->next; while (finder) { finder=finder->next; } return retval; } list_head *core_list_init(unsigned blksize, list_head *memblock, short seed) { unsigned per_item=16+sizeof(struct list_data_s); unsigned size=(blksize/per_item)-2; list_head *memblock_end=memblock+size; list_data *datablock=(list_data *)(memblock_end); list_data *datablock_end=datablock+size; unsigned i; list_head *finder,*list=memblock; list_data info; list->next=0; list->info=datablock; list->info->idx=0x0000; list->info->data16=(short)0x8080; memblock++; datablock++; info.idx=0x7fff; info.data16=(short)0xffff; core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); for (i=0; i<size; i++) { unsigned short datpat=((unsigned short)(seed^i) & 0xf); unsigned short dat=(datpat<<3) | (i&0x7); info.data16=(dat<<8) | dat; core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); } finder=list->next; i=1; while (finder->next!=0) { if (i<size/5) finder->info->idx=i++; else { unsigned short pat=(unsigned short)(i++ ^ seed); finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); } finder=finder->next; } list = core_list_mergesort(list,cmp_idx); return list; } list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock , list_head *memblock_end, list_data *datablock_end) { list_head *newitem; if ((*memblock+1) >= memblock_end) return 0; if ((*datablock+1) >= datablock_end) return 0; newitem=*memblock; (*memblock)++; newitem->next=insert_point->next; insert_point->next=newitem; newitem->info=*datablock; (*datablock)++; copy_info(newitem->info,info); return newitem; } list_head *core_list_remove(list_head *item) { list_data *tmp; list_head *ret=item->next; tmp=item->info; item->info=ret->info; ret->info=tmp; item->next=item->next->next; ret->next=0; return ret; } list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) { list_data *tmp; tmp=item_removed->info; item_removed->info=item_modified->info; item_modified->info=tmp; item_removed->next=item_modified->next; item_modified->next=item_removed; return item_removed; } list_head *core_list_find(list_head *list,list_data *info) { if (info->idx>=0) { while (list && (list->info->idx != info->idx)) list=list->next; return list; } else { while (list && ((list->info->data16 & 0xff) != info->data16)) list=list->next; return list; } } list_head *core_list_reverse(list_head *list) { list_head *next=0, *tmp; while (list) { tmp=list->next; list->next=next; next=list; list=tmp; } return next; } list_head *core_list_mergesort(list_head *list, list_cmp cmp) { list_head *p, *q, *e, *tail; int insize, nmerges, psize, qsize, i; insize = 1; while (1) { p = list; list = 0; tail = 0; nmerges = 0; while (p) { nmerges++; q = p; psize = 0; for (i = 0; i < insize; i++) { psize++; q = q->next; if (!q) break; } qsize = insize; while (psize > 0 || (qsize > 0 && q)) { if (psize == 0) { e = q; q = q->next; qsize--; } else if (qsize == 0 || !q) { e = p; p = p->next; psize--; } else if (cmp(p->info,q->info) <= 0) { e = p; p = p->next; psize--; } else { e = q; q = q->next; qsize--; } if (tail) { tail->next = e; } else { list = e; } tail = e; } p = q; } tail->next = 0; if (nmerges <= 1) return list; insize *= 2; } return list; } #define N 1000000 unsigned char pp[N]; int main (int argc, char *argv[]) { list_head *head = core_list_init(N, (list_head *)pp, 0); core_bench_list(head, 102, 1); core_bench_list(head, 102, -1); return 0; }