From: Vladislav Egorov <vegorov...@gmail.com> Absolute majority of identifiers in shaders are not macro references. Many shaders don't use preprocessing macros at all. Almost all queries to parser->defines hash-table will be unsuccessful. Note that all predefined macros start either with "GL_" or with "__". Moreover, even user-defined macros are usually use upper case, while ordinary identifiers use lower case. It's easy to detect macros just looking on the first two characters of their name. So it makes sense to make a quick identifier lookup in a Bloom filter before making relatively expensive hash-table search call.
Many schemes of Bloom are possible, but it seems that two mini 64 bit Bloom tables -- one for the 1st identifier's char, one for the second identifier's char -- with one trivial hash-function (6 lowest bits of the char) work very well. Considering that Latin-1 alphabetic chars and _ occupy positions from 65 to 122, every character will have its bit position in 64-bit word, distinguishing lower and upper case. Some games do things like "#define float4 float", this scheme would not help here, it barely improves heavy users of preprocessor like Talos Principle, but in general it works pretty well. In my shader-db it eliminates 90%+ hash table queries during preprocessing stage. One 64-bit Bloom table working on only 1st or only 2nd char will eliminate ~70-80% queries. For simplicity use the same scheme for both 32-bit and 64-bit archs. 32-bit archs still have pretty efficients ways to do 64-bit shifts. --- src/compiler/glsl/glcpp/glcpp-parse.y | 24 ++++++++++++++++++------ src/compiler/glsl/glcpp/glcpp.h | 10 ++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index debf12bef8..19a6df2015 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -1359,6 +1359,11 @@ glcpp_parser_create(const struct gl_extensions *extension_list, glcpp_lex_init_extra (parser, &parser->scanner); parser->defines = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); + parser->defines_bloom0 = 0; + parser->defines_bloom1 = 0; + GLCPP_DEFINES_BLOOM_PUT(parser, "__LINE__"); + GLCPP_DEFINES_BLOOM_PUT(parser, "__FILE__"); + parser->linalloc = linear_alloc_parent(parser, 0); parser->active = NULL; parser->lexing_directive = 0; @@ -1845,6 +1850,9 @@ _glcpp_parser_expand_node(glcpp_parser_t *parser, token_node_t *node, *last = node; identifier = token->value.str; + if (!GLCPP_DEFINES_BLOOM_GET(parser, identifier)) + return NULL; + /* Special handling for __LINE__ and __FILE__, (not through * the hash table). */ if (*identifier == '_') { @@ -2131,6 +2139,7 @@ _define_object_macro(glcpp_parser_t *parser, YYLTYPE *loc, glcpp_error (loc, parser, "Redefinition of macro %s\n", identifier); } + GLCPP_DEFINES_BLOOM_PUT(parser, identifier); _mesa_hash_table_insert (parser->defines, identifier, macro); } @@ -2166,6 +2175,7 @@ _define_function_macro(glcpp_parser_t *parser, YYLTYPE *loc, glcpp_error (loc, parser, "Redefinition of macro %s\n", identifier); } + GLCPP_DEFINES_BLOOM_PUT(parser, identifier); _mesa_hash_table_insert(parser->defines, identifier, macro); } @@ -2212,12 +2222,14 @@ glcpp_parser_lex(YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser) ret == ENDIF || ret == HASH_TOKEN) { parser->in_control_line = 1; } else if (ret == IDENTIFIER) { - struct hash_entry *entry = _mesa_hash_table_search(parser->defines, - yylval->str); - macro_t *macro = entry ? entry->data : NULL; - if (macro && macro->is_function) { - parser->newline_as_space = 1; - parser->paren_count = 0; + if (GLCPP_DEFINES_BLOOM_GET(parser, yylval->str)) { + struct hash_entry *entry = _mesa_hash_table_search(parser->defines, + yylval->str); + macro_t *macro = entry ? entry->data : NULL; + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } } } diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h index 9d997cd924..b2f6f7a2a3 100644 --- a/src/compiler/glsl/glcpp/glcpp.h +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -37,6 +37,14 @@ #define yyscan_t void* +#define GLCPP_DEFINES_BLOOM_GET(parser, identifier) \ + (((((parser)->defines_bloom0) >> ((identifier)[0] & 63)) & 1) & \ + ((((parser)->defines_bloom1) >> ((identifier)[1] & 63)) & 1)) + +#define GLCPP_DEFINES_BLOOM_PUT(parser, identifier) \ + (parser)->defines_bloom0 |= (uint64_t)1 << ((identifier)[0] & 63); \ + (parser)->defines_bloom1 |= (uint64_t)1 << ((identifier)[1] & 63); + /* Some data types used for parser values. */ typedef struct expression_value { @@ -186,6 +194,8 @@ struct glcpp_parser { void *linalloc; yyscan_t scanner; struct hash_table *defines; + uint64_t defines_bloom0; + uint64_t defines_bloom1; active_list_t *active; int lexing_directive; int lexing_version_directive; -- 2.13.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev