I add below a ~100-line code. When compiling the code with g++ -march=pentium4 -O3 -g -S allocator.cc (g++ is version 3.4.2) I get this assembly code:
.loc 1 50 0 jne .L2 .loc 1 93 0 movl 4(%ecx), %ebx // ebx now holds page->prev movl (%ecx), %eax // eax now holds page->next .loc 1 97 0 movl _ZN8PagePool9free_listE, %edx // edx now holds free_list .loc 1 93 0 movl %eax, (%ebx) // page->prev->next = page->next .loc 1 97 0 movl %edx, (%ecx) // page->next = free_list->next .loc 1 98 0 movl %ecx, _ZN8PagePool9free_listE // free_list->next = page .loc 1 94 0 movl (%ecx), %esi // esi now holds page->next // (BUG: This is actually free_list->next) movl %ebx, 4(%esi) // page->next->prev = page->prev // (BUG: this is actually // page->next->prev = free_list->next) The optimization that moved the assembly of line 97 to be before line 94 causes incorrect value into 4(%esi) (= page->next->prev) The C++ code: uncommneting the printf "fixes" this problem since apparently it prevent this optimization from taking place +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #include <stddef.h> #include <stdlib.h> #include <stdio.h> const size_t granularity_bits = 2; const size_t granularity = 1 << granularity_bits; const size_t page_size_bits = 12; const size_t page_size = 1 << page_size_bits; const size_t metapage_size_bits = 8; const size_t metapage_size = 1 << metapage_size_bits; size_t handled_obj_size(size_t page_free_size) { return page_free_size; } template<class T> inline T align_down(T val, size_t alignment) { return val & ~(alignment - 1); } // A node of the free list struct Header { Header* next; Header() : next(0) {} bool is_empty() const { return next == 0; } void enqueue(Header* obj) { obj->next = next; next = obj; } }; struct Page { // The list of pages of the same object-size Page* prev; Page* next; // sizes size_t alloc_size; size_t alloc_count; // List of free slabs in this page Header free_list; // pointer to the first unallocated slab void* unallocated; Page() : prev(0), next(0), free_list() { } bool is_empty() const { return (next == this); } bool is_page_full() const { return free_list.is_empty() && (unallocated == 0); } bool is_page_empty() const { return alloc_count == 0; } void free(void* obj) { --alloc_count; free_list.enqueue((Header*) obj); } bool is_big_alloc() { return alloc_size == 0; } void* big_free() { return unallocated; } }; struct PagePool { static Header free_list; static void free(Page* page) { free_list.enqueue((Header*) page); } }; Header PagePool::free_list; const size_t num_sizes = page_size / granularity; const size_t page_free_space = page_size - sizeof(Page); Page page_lists[num_sizes]; void operator delete(void* ptr) { if(ptr == 0) { return; } Page* page = (Page*) align_down((ptrdiff_t) ptr, page_size); if(page->is_big_alloc()) { void* place = page->big_free(); free(place); return; } if(page->is_page_full()) { } else { page->free(ptr); if(page->is_page_empty()) { page->next->prev = page->prev; page->prev->next = page->next; // fprintf(stderr, "ddd after 3 pnp: %p, ppn: %p \n", page->next->prev, page->prev->next); ((Header*)page)->next = PagePool::free_list.next; PagePool::free_list.next = (Header*)page; } } } +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -- Summary: Bad registers optimization Product: gcc Version: 3.4.2 Status: UNCONFIRMED Severity: critical Priority: P2 Component: c++ AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: hayim at dv-networks dot com CC: gcc-bugs at gcc dot gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=22190