Dear gcc contributors,

Recently I came across the list of "ideas for speeding up GCC"
(http://gcc.gnu.org/wiki/Speedup_areas). Among others, there was
suggested to replace identifier hash table with other data structure.

Please find attached a patch, that switches the resolution strategy of
a hash table used in the symbol table of identifiers from open
addressing to separate chaining with linked list. I would be very
grateful for your comments, feedback and ideas about further
enhancements to gcc, in which I could take a part. I am pursuing
master of science and looking for thesis theme, that is compiler
related.

Below are results of testing and the patch.

During testing of the linux kernel (3.8.8) compilation time, the
acquired results were the following: increasing by 0.17% for the
version 4.8.0, increasing by 1.12% for the version 4.8.1, decreasing
by 0.598% for trunk (this are average values).


Tested x86_64-unknown-linux-gnu, applying to 4.8.0, 4.8.1 and trunk.


diff --git a/gcc/stringpool.c b/gcc/stringpool.c

index f4d0dae..cc696f7 100644

--- a/gcc/stringpool.c

+++ b/gcc/stringpool.c

@@ -241,12 +241,8 @@ gt_pch_nx (unsigned char *x, gt_pointer_operator
op, void *cookie)

/* SPD is saved in the PCH file and holds the information needed

to restore the string pool. */

-struct GTY(()) string_pool_data {

- ht_identifier_ptr *

- GTY((length ("%h.nslots"),

- nested_ptr (union tree_node, "%h ? GCC_IDENT_TO_HT_IDENT (%h) : NULL",

- "%h ? HT_IDENT_TO_GCC_IDENT (%h) : NULL")))

- entries;

+struct GTY((user)) string_pool_data {

+ ht_identifier_ptr * entries;

unsigned int nslots;

unsigned int nelements;

};

@@ -284,4 +280,113 @@ gt_pch_restore_stringpool (void)

spd = NULL;

}

+/* User-provided GC marking routines for the struct ht_identifier.
They are using

+ only in GC marking routines for the struct string_pool_data. */

+

+extern void ht_identifier_ggc_mx (void *x_p);

+extern void ht_identifier_pch_nx (void *x_p);

+extern void ht_identifier_pch_nx (void *this_obj, void *x_p,
gt_pointer_operator op, void *cookie);

+

+void

+ht_identifier_ggc_mx (void *x_p)

+{

+ struct ht_identifier * x = (struct ht_identifier *)x_p;

+ struct ht_identifier * xlimit = x;

+ while (ggc_test_and_set_mark (xlimit))

+ xlimit = ((*xlimit).next);

+ while (x != xlimit)

+ {

+ union tree_node * const x0 = ((*x).next) ? HT_IDENT_TO_GCC_IDENT
(((*x).next)) : NULL;

+ gt_ggc_m_9tree_node (x0);

+ x = ((*x).next);

+ }

+}

+

+void

+ht_identifier_pch_nx (void *x_p)

+{

+ struct ht_identifier * x = (struct ht_identifier *)x_p;

+ struct ht_identifier * xlimit = x;

+ while (gt_pch_note_object (xlimit, xlimit, ht_identifier_pch_nx))

+ xlimit = ((*xlimit).next);

+ while (x != xlimit)

+ {

+ union tree_node * const x0 = ((*x).next) ? HT_IDENT_TO_GCC_IDENT
(((*x).next)) : NULL;

+ gt_pch_n_9tree_node (x0);

+ x = ((*x).next);

+ }

+}

+

+void

+ht_identifier_pch_nx (void *this_obj, void *x_p, gt_pointer_operator
op, void *cookie)

+{

+ struct ht_identifier * x = (struct ht_identifier *)x_p;

+ op (&((*x).str), cookie);

+ union tree_node * x0 = ((*x).next) ? HT_IDENT_TO_GCC_IDENT
(((*x).next)) : NULL;

+ op (&(x0), cookie);

+ (*x).next = (x0) ? (GCC_IDENT_TO_HT_IDENT ((x0))) : NULL;

+}

+

+/* A pointer walker for entries of the struct string_pool_data. */

+

+void

+string_pool_data_pch_entries_walker (void *this_obj, void *x_p,
gt_pointer_operator op, void *cookie)

+{

+ struct string_pool_data * x ATTRIBUTE_UNUSED = (struct string_pool_data *)x_p;

+ size_t l0 = (size_t)(((*x)).nslots);

+ if ((*x).entries != NULL)

+ {

+ size_t i0;

+ for (i0 = 0; i0 != (size_t)(l0); i0++)

+ {

+ union tree_node * x0 = ((*x).entries[i0]) ? HT_IDENT_TO_GCC_IDENT
(((*x).entries[i0])) : NULL;

+ op (&(x0), cookie);

+ (*x).entries[i0] = (x0) ? (GCC_IDENT_TO_HT_IDENT ((x0))) : NULL;

+ }

+ }

+}

+

+/* User-provided GC marking routines for the struct string_pool_data. */

+

+void

+gt_ggc_mx (string_pool_data *x)

+{

+ size_t l0 = (size_t)(((*x)).nslots);

+ if ((*x).entries != NULL)

+ {

+ size_t i0;

+ for (i0 = 0; i0 != (size_t)(l0); i0++)

+ {

+ ht_identifier_ggc_mx ((*x).entries[i0]);

+ union tree_node * const x0 = ((*x).entries[i0]) ?
HT_IDENT_TO_GCC_IDENT (((*x).entries[i0])) : NULL;

+ gt_ggc_m_9tree_node (x0);

+ }

+ ggc_mark ((*x).entries);

+ }

+}

+

+void

+gt_pch_nx (string_pool_data *x)

+{

+ size_t l0 = (size_t)(((*x)).nslots);

+ if ((*x).entries != NULL)

+ {

+ size_t i0;

+ for (i0 = 0; i0 != (size_t)(l0); i0++)

+ {

+ union tree_node * const x1 = ((*x).entries[i0]) ?
HT_IDENT_TO_GCC_IDENT (((*x).entries[i0])) : NULL;

+ gt_pch_n_9tree_node (x1);

+ ht_identifier_pch_nx ((*x).entries[i0]);

+ }

+ gt_pch_note_object ((*x).entries, x, string_pool_data_pch_entries_walker);

+ }

+}

+

+void

+gt_pch_nx (string_pool_data *x, gt_pointer_operator op, void *cookie)

+{

+ if ((*x).entries != NULL)

+ op (&((*x).entries), cookie);

+}

+

#include "gt-stringpool.h"

diff --git a/libcpp/include/symtab.h b/libcpp/include/symtab.h

index a4ea719..88a0bd3 100644

--- a/libcpp/include/symtab.h

+++ b/libcpp/include/symtab.h

@@ -28,10 +28,14 @@ along with this program; see the file COPYING3. If not see

deeply within another object. */

typedef struct ht_identifier ht_identifier;

typedef struct ht_identifier *ht_identifier_ptr;

-struct GTY(()) ht_identifier {

+struct GTY((chain_next ("%h.next"))) ht_identifier {

const unsigned char *str;

unsigned int len;

unsigned int hash_value;

+ struct ht_identifier

+ GTY((nested_ptr (union tree_node, "%h ? (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",

+ "%h ? HT_IDENT_TO_GCC_IDENT (%h) : NULL")))

+ *next;

};

#define HT_LEN(NODE) ((NODE)->len)

diff --git a/libcpp/symtab.c b/libcpp/symtab.c

index dee8626..a4e05a0 100644

--- a/libcpp/symtab.c

+++ b/libcpp/symtab.c

@@ -30,7 +30,6 @@ along with this program; see the file COPYING3. If not see

existing entry with a potential new one. */

static unsigned int calc_hash (const unsigned char *, size_t);

-static void ht_expand (cpp_hash_table *);

static double approx_sqrt (double);

/* A deleted entry. */

@@ -66,13 +65,14 @@ ht_create (unsigned int order)

(void (*) (void *)) free);

obstack_alignment_mask (&table->stack) = 0;

-

table->entries = XCNEWVEC (hashnode, nslots);

table->entries_owned = true;

table->nslots = nslots;

+ table->nelements = 0;

return table;

}

+

/* Frees all memory associated with a hash table. */

void

@@ -102,140 +102,58 @@ ht_lookup_with_hash (cpp_hash_table *table,
const unsigned char *str,

size_t len, unsigned int hash,

enum ht_lookup_option insert)

{

- unsigned int hash2;

unsigned int index;

- unsigned int deleted_index = table->nslots;

size_t sizemask;

hashnode node;

-

sizemask = table->nslots - 1;

index = hash & sizemask;

table->searches++;

-

node = table->entries[index];

-

- if (node != NULL)

- {

- if (node == DELETED)

- deleted_index = index;

- else if (node->hash_value == hash

- && HT_LEN (node) == (unsigned int) len

- && !memcmp (HT_STR (node), str, len))

- return node;

-

- /* hash2 must be odd, so we're guaranteed to visit every possible

- location in the table during rehashing. */

- hash2 = ((hash * 17) & sizemask) | 1;

-

- for (;;)

- {

- table->collisions++;

- index = (index + hash2) & sizemask;

- node = table->entries[index];

- if (node == NULL)

- break;

-

- if (node == DELETED)

- {

- if (deleted_index != table->nslots)

- deleted_index = index;

- }

- else if (node->hash_value == hash

- && HT_LEN (node) == (unsigned int) len

- && !memcmp (HT_STR (node), str, len))

- return node;

- }

- }

-

+ while(node != NULL)

+ {

+ if (HT_LEN (node) == (unsigned int) len

+ && !memcmp (HT_STR (node), str, len))

+ return node;

+ node = node->next;

+ }

if (insert == HT_NO_INSERT)

return NULL;

-

- /* We prefer to overwrite the first deleted slot we saw. */

- if (deleted_index != table->nslots)

- index = deleted_index;

-

node = (*table->alloc_node) (table);

- table->entries[index] = node;

-

HT_LEN (node) = (unsigned int) len;

node->hash_value = hash;

-

+ node->next = table->entries[index];

+ table->entries[index] = node;

+ table->nelements++;

if (table->alloc_subobject)

- {

- char *chars = (char *) table->alloc_subobject (len + 1);

- memcpy (chars, str, len);

- chars[len] = '\0';

- HT_STR (node) = (const unsigned char *) chars;

- }

+ {

+ char *chars = (char *) table->alloc_subobject (len + 1);

+ memcpy (chars, str, len);

+ chars[len] = '\0';

+ HT_STR (node) = (const unsigned char *) chars;

+ }

else

HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,

str, len);

-

- if (++table->nelements * 4 >= table->nslots * 3)

- /* Must expand the string table. */

- ht_expand (table);

-

return node;

}

-/* Double the size of a hash table, re-hashing existing entries. */

-

-static void

-ht_expand (cpp_hash_table *table)

-{

- hashnode *nentries, *p, *limit;

- unsigned int size, sizemask;

-

- size = table->nslots * 2;

- nentries = XCNEWVEC (hashnode, size);

- sizemask = size - 1;

-

- p = table->entries;

- limit = p + table->nslots;

- do

- if (*p && *p != DELETED)

- {

- unsigned int index, hash, hash2;

-

- hash = (*p)->hash_value;

- index = hash & sizemask;

-

- if (nentries[index])

- {

- hash2 = ((hash * 17) & sizemask) | 1;

- do

- {

- index = (index + hash2) & sizemask;

- }

- while (nentries[index]);

- }

- nentries[index] = *p;

- }

- while (++p < limit);

-

- if (table->entries_owned)

- free (table->entries);

- table->entries_owned = true;

- table->entries = nentries;

- table->nslots = size;

-}

/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,

the node, and V. */

void

ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)

{

- hashnode *p, *limit;

-

- p = table->entries;

- limit = p + table->nslots;

- do

- if (*p && *p != DELETED)

- {

- if ((*cb) (table->pfile, *p, v) == 0)

- break;

- }

- while (++p < limit);

+ hashnode tmp;

+ for(unsigned int i = 0; i < table->nslots; i++)

+ {

+ tmp = table->entries[i];

+ while(tmp != NULL)

+ {

+ if ((*cb) (table->pfile, tmp, v) == 0)

+ return;

+ tmp = tmp->next;

+ }

+ }

}

/* Like ht_forall, but a nonzero return from the callback means that

@@ -243,17 +161,29 @@ ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)

void

ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)

{

- hashnode *p, *limit;

-

- p = table->entries;

- limit = p + table->nslots;

- do

- if (*p && *p != DELETED)

+ hashnode tmp;

+ hashnode old;

+ for(unsigned int i = 0; i < table->nslots; i++)

+ {

+ old = table->entries[i];

+ while(old != NULL)

+ {

+ tmp = old->next;

+ if (tmp != NULL && (*cb) (table->pfile, tmp, v))

{

- if ((*cb) (table->pfile, *p, v))

- *p = DELETED;

+ old->next = tmp->next;

+ tmp = old->next;

+ table->nelements--;

}

- while (++p < limit);

+ else

+ old = tmp;

+ }

+ old = table->entries[i];

+ if(old != NULL && (*cb) (table->pfile, old, v))

+ {

+ table->entries[i] = old->next;

+ }

+ }

}

/* Restore the hash table. */

@@ -278,7 +208,6 @@ ht_dump_statistics (cpp_hash_table *table)

size_t nelts, nids, overhead, headers;

size_t total_bytes, longest, deleted = 0;

double sum_of_squares, exp_len, exp_len2, exp2_len;

- hashnode *p, *limit;

#define SCALE(x) ((unsigned long) ((x) < 1024*10 \

? (x) \

@@ -288,22 +217,21 @@ ht_dump_statistics (cpp_hash_table *table)

#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))

total_bytes = longest = sum_of_squares = nids = 0;

- p = table->entries;

- limit = p + table->nslots;

- do

- if (*p == DELETED)

- ++deleted;

- else if (*p)

- {

- size_t n = HT_LEN (*p);

-

- total_bytes += n;

- sum_of_squares += (double) n * n;

- if (n > longest)

- longest = n;

- nids++;

- }

- while (++p < limit);

+ hashnode tmp;

+ for(unsigned int i = 0; i < table->nslots; i++)

+ {

+ tmp = table->entries[i];

+ while(tmp != NULL)

+ {

+ size_t n = HT_LEN (tmp);

+ total_bytes += n;

+ sum_of_squares += (double) n * n;

+ if (n > longest)

+ longest = n;

+ nids++;

+ tmp = tmp->next;

+ }

+ }

nelts = table->nelements;

overhead = obstack_memory_used (&table->stack) - total_bytes;

@@ -360,3 +288,4 @@ approx_sqrt (double x)

while (d > .0001);

return s;

}

+

Reply via email to