19.01.2017 01:46, Ian Romanick пишет:
On 01/07/2017 11:02 AM, Vladislav Egorov wrote:
glcpp's printing is an obvious low hanging fruit:

1. It unnecessarily uses formatted printing to print output of
    preprocessing. To print just one character '+' it first uses
    vsnprintf("%s", "+") to calculate number of characters in the
    formatted string (while it's known statically), then resizes
    buffer increasing it by 1 and then again uses vsnprintf() to
    actually print the string to the resized buffer.

2. It linearly grows output string buffer. Each time preprocessor
    prints anything, it reallocates output buffer increasing it by
    the exact size of appended string. It's bad for two reasons -
    a) unnecesary realloc() calls, b) to print formatted string it
    always have to calculate formatted output size, then resize, and
    only then finally print it - instead of immediately printing to
    the buffer. So it's doing double work.

Create simple exponentially growing string. Use it for preprocessor's
output and error output.
This patch is a bit tough to review.  Part of the difficulty comes
because a bunch of different things happen in the same patch.  I would
suggest splitting it into a couple patches:

Patch 1: Add a new function glcpp_append_char that uses the existing
tracking methods to add a singe character.  Since this avoids all of the
vsnprintf overhead, it should show some benefit on its own.

Patch 2: Same as patch 1, but add glcpp_append_string.  Depending on how
big this actually is, it might be squashed with patch 1.

Patch 3: Same as patches 1 and 2, but add the printf and vprintf
functions.  Again... maybe squash this with the other two.

Patch 4: Switch to the new tracking method.

This keeps the mechanical changes separate from the changes that
actually need more careful review.  It also means that some of it could
land before all of it is ready.

Sure, I'll do it. I don't know, if I'll manage to clearly separate API and the implementation though.


There are a few other comments below.

---
  src/compiler/glsl/glcpp/glcpp-parse.y |  88 +++++++++--------
  src/compiler/glsl/glcpp/glcpp.h       |  60 +++++++++++-
  src/compiler/glsl/glcpp/pp.c          | 176 ++++++++++++++++++++++++++++------
  3 files changed, 251 insertions(+), 73 deletions(-)

diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y 
b/src/compiler/glsl/glcpp/glcpp-parse.y
index 63012bc..d5aa27e 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -209,7 +209,7 @@ line:
  |     SPACE control_line
  |     text_line {
                _glcpp_parser_print_expanded_token_list (parser, $1);
-               ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, 
"\n");
+               glcpp_append_char (parser, &parser->output, '\n');
        }
  |     expanded_line
  ;
@@ -228,20 +228,15 @@ expanded_line:
  |     LINE_EXPANDED integer_constant NEWLINE {
                parser->has_new_line_number = 1;
                parser->new_line_number = $2;
-               ralloc_asprintf_rewrite_tail (&parser->output,
-                                             &parser->output_length,
-                                             "#line %" PRIiMAX "\n",
-                                             $2);
+               glcpp_printf (parser, &parser->output, "#line %" PRIiMAX "\n", 
$2);
        }
  |     LINE_EXPANDED integer_constant integer_constant NEWLINE {
                parser->has_new_line_number = 1;
                parser->new_line_number = $2;
                parser->has_new_source_number = 1;
                parser->new_source_number = $3;
-               ralloc_asprintf_rewrite_tail (&parser->output,
-                                             &parser->output_length,
-                                             "#line %" PRIiMAX " %" PRIiMAX 
"\n",
-                                             $2, $3);
+               glcpp_printf (parser, &parser->output, "#line %" PRIiMAX " %" PRIiMAX 
"\n",
+                             $2, $3);
        }
  ;
@@ -259,7 +254,7 @@ define: control_line:
        control_line_success {
-               ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, 
"\n");
+               glcpp_append_char (parser, &parser->output, '\n');
        }
  |     control_line_error
  |     HASH_TOKEN LINE pp_tokens NEWLINE {
@@ -435,7 +430,8 @@ control_line_success:
                glcpp_parser_resolve_implicit_version(parser);
        }
  |     HASH_TOKEN PRAGMA NEWLINE {
-               ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, 
"#%s", $2);
+               glcpp_append_char (parser, &parser->output, '#');
+               glcpp_append (parser, &parser->output, $2);
        }
  ;
@@ -1113,60 +1109,61 @@ _token_list_equal_ignoring_space(token_list_t *a, token_list_t *b)
  }
static void
-_token_print(char **out, size_t *len, token_t *token)
+_token_print(glcpp_parser_t *parser, struct string_buffer *out,
+             token_t *token)
  {
     if (token->type < 256) {
-      ralloc_asprintf_rewrite_tail (out, len, "%c", token->type);
+      glcpp_append_char (parser, out, token->type);
        return;
     }
switch (token->type) {
     case INTEGER:
-      ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival);
+      glcpp_printf (parser, out, "%" PRIiMAX, token->value.ival);
        break;
     case IDENTIFIER:
     case INTEGER_STRING:
     case OTHER:
-      ralloc_asprintf_rewrite_tail (out, len, "%s", token->value.str);
+      glcpp_append (parser, out, token->value.str);
        break;
     case SPACE:
-      ralloc_asprintf_rewrite_tail (out, len, " ");
+      glcpp_append_char (parser, out, ' ');
        break;
     case LEFT_SHIFT:
-      ralloc_asprintf_rewrite_tail (out, len, "<<");
+      glcpp_append (parser, out, "<<");
        break;
     case RIGHT_SHIFT:
-      ralloc_asprintf_rewrite_tail (out, len, ">>");
+      glcpp_append (parser, out, ">>");
        break;
     case LESS_OR_EQUAL:
-      ralloc_asprintf_rewrite_tail (out, len, "<=");
+      glcpp_append (parser, out, "<=");
        break;
     case GREATER_OR_EQUAL:
-      ralloc_asprintf_rewrite_tail (out, len, ">=");
+      glcpp_append (parser, out, ">=");
        break;
     case EQUAL:
-      ralloc_asprintf_rewrite_tail (out, len, "==");
+      glcpp_append (parser, out, "==");
        break;
     case NOT_EQUAL:
-      ralloc_asprintf_rewrite_tail (out, len, "!=");
+      glcpp_append (parser, out, "!=");
        break;
     case AND:
-      ralloc_asprintf_rewrite_tail (out, len, "&&");
+      glcpp_append (parser, out, "&&");
        break;
     case OR:
-      ralloc_asprintf_rewrite_tail (out, len, "||");
+      glcpp_append (parser, out, "||");
        break;
     case PASTE:
-      ralloc_asprintf_rewrite_tail (out, len, "##");
+      glcpp_append (parser, out, "##");
        break;
     case PLUS_PLUS:
-      ralloc_asprintf_rewrite_tail (out, len, "++");
+      glcpp_append (parser, out, "++");
        break;
     case MINUS_MINUS:
-      ralloc_asprintf_rewrite_tail (out, len, "--");
+      glcpp_append (parser, out, "--");
        break;
     case DEFINED:
-      ralloc_asprintf_rewrite_tail (out, len, "defined");
+      glcpp_append (parser, out, "defined");
        break;
     case PLACEHOLDER:
        /* Nothing to print. */
@@ -1293,11 +1290,11 @@ _token_paste(glcpp_parser_t *parser, token_t *token, 
token_t *other)
FAIL:
     glcpp_error (&token->location, parser, "");
-   ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "Pasting 
\"");
-   _token_print (&parser->info_log, &parser->info_log_length, token);
-   ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" and 
\"");
-   _token_print (&parser->info_log, &parser->info_log_length, other);
-   ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" 
does not give a valid preprocessing token.\n");
+   glcpp_append (parser, &parser->info_log, "Pasting \"");
+   _token_print (parser, &parser->info_log, token);
+   glcpp_append (parser, &parser->info_log, "\" and \"");
+   _token_print (parser, &parser->info_log, other);
+   glcpp_append (parser, &parser->info_log, "\" does not give a valid preprocessing 
token.\n");
return token;
  }
@@ -1311,7 +1308,7 @@ _token_list_print(glcpp_parser_t *parser, token_list_t 
*list)
        return;
for (node = list->head; node; node = node->next)
-      _token_print (&parser->output, &parser->output_length, node->token);
+      _token_print (parser, &parser->output, node->token);
  }
void
@@ -1333,6 +1330,11 @@ add_builtin_define(glcpp_parser_t *parser, const char 
*name, int value)
     _define_object_macro(parser, NULL, name, list);
  }
+/* Initial output buffer size, 4096 minus ralloc() overhead. It was selected
+ * to minimize total amount of allocated memory during shader-db run.
+ */
+#define INITIAL_PP_OUTPUT_BUF_SIZE 4048
+
  glcpp_parser_t *
  glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api 
api)
  {
@@ -1362,10 +1364,12 @@ glcpp_parser_create(glcpp_extension_iterator 
extensions, void *state, gl_api api
     parser->lex_from_list = NULL;
     parser->lex_from_node = NULL;
- parser->output = ralloc_strdup(parser, "");
-   parser->output_length = 0;
-   parser->info_log = ralloc_strdup(parser, "");
-   parser->info_log_length = 0;
+   parser->output.buf = ralloc_size (parser, INITIAL_PP_OUTPUT_BUF_SIZE);
+   parser->output.capacity = INITIAL_PP_OUTPUT_BUF_SIZE;
+   parser->output.length = 0;
+   parser->info_log.buf = ralloc_size (parser, 16);
+   parser->info_log.capacity = 16;
+   parser->info_log.length = 0;
     parser->error = 0;
parser->extensions = extensions;
@@ -2336,10 +2340,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t 
*parser, intmax_t versio
                           version, parser->is_gles);
if (explicitly_set) {
-      ralloc_asprintf_rewrite_tail(&parser->output, &parser->output_length,
-                                   "#version %" PRIiMAX "%s%s", version,
-                                   es_identifier ? " " : "",
-                                   es_identifier ? es_identifier : "");
+      glcpp_printf(parser, &parser->output, "#version %" PRIiMAX "%s%s",
+                   version,
+                   es_identifier ? " " : "",
+                   es_identifier ? es_identifier : "");
     }
  }
diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h
index 232e053..068d83b 100644
--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -180,6 +180,13 @@ typedef void (*glcpp_extension_iterator)(
                unsigned version,
                bool es);
+struct string_buffer {
+       /* buf is NOT null-terminated, but MUST have enough of space for '\0' */
+       char *buf;
+       size_t length;
+       size_t capacity;
+};
+
  struct glcpp_parser {
        void *linalloc;
        yyscan_t scanner;
@@ -199,10 +206,8 @@ struct glcpp_parser {
        int skipping;
        token_list_t *lex_from_list;
        token_node_t *lex_from_node;
-       char *output;
-       char *info_log;
-       size_t output_length;
-       size_t info_log_length;
+       struct string_buffer output;
+       struct string_buffer info_log;
        int error;
        glcpp_extension_iterator extensions;
        void *state;
@@ -241,6 +246,53 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, 
char **info_log,
                 glcpp_extension_iterator extensions, void *state,
                 struct gl_context *g_ctx);
+/* Functions for printing into string_buffer.
+ * These functions return false if printing failed.
+ */
+
+bool
+glcpp_strbuffer_ensure_capacity(glcpp_parser_t *parser,
+                               struct string_buffer *str,
+                               size_t len_append);
+
+bool
+glcpp_vprintf(glcpp_parser_t *parser, struct string_buffer *str,
+             const char *fmt, va_list args);
+
+bool
+glcpp_printf(glcpp_parser_t *parser, struct string_buffer *str,
+            const char *fmt, ...);
+
+/* Inlining is necessary to resolve length of literal strings statically */
+static inline bool
+glcpp_append(glcpp_parser_t *parser, struct string_buffer *str, const char *s)
+{
+       size_t len = strlen(s);
+       if (likely(glcpp_strbuffer_ensure_capacity(parser, str, len))) {
I don't think you want to use likely() here or the first unlikely() in
glcpp_append_char.  GCC takes unlikely() to mean a condition that
essentially *never* happens.  My understanding is that the code in the
unlikely branch may be severely penalized... far beyond tweaking branch
prediction flags.  It is worth comparing the code generated (at least
via size) and performance with and without.

AFAIK, compilers don't severely penalize unlikely code, just a bit reorder blocks to make the likely path less "bumpy" and more local (instruction cache performance, etc.). Both Clang and GCC generate almost the same code, but gather likely blocks closer to each other: https://godbolt.org/g/i5M3Dd I doubt there will be any performance difference at all.

+               /* Don't copy null-terminator, str->buf is
+                * not a null-terminating string
+                */
+               memcpy(str->buf + str->length, s, len);
+               str->length += len;
+               return true;
+       } else {
+               return false;
+       }
+}
+
+static inline bool
+glcpp_append_char(glcpp_parser_t *parser, struct string_buffer *str, char ch)
+{
+       assert(str->capacity > 0 && ch != '\0');
+       if (unlikely(str->length + 1 >= str->capacity)) {
+               if (unlikely(!glcpp_strbuffer_ensure_capacity(parser, str, 1)))
+                       return false;
+       }
+       str->buf[str->length] = ch;
+       str->length++;
+       return true;
+}
+
  /* Functions for writing to the info log */
void
diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c
index b591279..7fbadf9 100644
--- a/src/compiler/glsl/glcpp/pp.c
+++ b/src/compiler/glsl/glcpp/pp.c
@@ -24,7 +24,135 @@
  #include <assert.h>
  #include <string.h>
  #include <ctype.h>
+#include <stdio.h>
+#include <limits.h>
  #include "glcpp.h"
+#include "util/bitscan.h"
+#include "util/u_math.h"
+
+/* Overhead of ralloc allocation in bytes. Should be equal to
+ * sizeof(ralloc_header), but ralloc_header is not a part of
+ * the ralloc public API. Assume it's 48. It's a slight
+ * overestimation, but it's enough for practical purposes.
+ */
+#define RALLOC_OVERHEAD 48
+
+bool
+glcpp_strbuffer_ensure_capacity(glcpp_parser_t *parser,
parser is only used as a memory context, so I'd replace this with 'void
*mem_ctx'.  Same for all the related functions below.  One advantage of
this is that it makes it easier to pull this code out of glcpp for use
elsewhere.  It seems like there are other potential users.


One more reason I wanted parser here -- to stick (potentially) handling OOM-like conditions here, report glcpp_error() and set parser->error flag. Otherwise print() callers should do it themselves, but it will add quite a bit of boilerplate. And the callers don't know when exactly buffer reallocation happened, so they need to check it every call -- it will add a tiny bit of overhead. Very minor, but overhead.

+                               struct string_buffer *str, size_t len_append)
+{
+       /* Existing string length + 1 null-character + new characters */
+       size_t min_capacity = str->length + 1 + len_append;
+       if (unlikely(min_capacity <= str->length))
This is actually a reasonable use of unlikely().  This condition will
only occur of the required capacity overflows.  That really should never
happen.

+               return false;
Blank line here.

+       if (unlikely(min_capacity > str->capacity)) {
+               char *new_buf;
+               size_t new_capacity = str->capacity;
+               size_t overhead_capacity;;
                                          ^ extra ;

I believe we can use C99 mixed code and declarations, so I'd move this
declaration down to it initializer.

Yes, it seems that MSVC2013 already supports it, so no reason not use it.

+               do {
+                       /* Grow factor = 1.5. Most of string implementations
+                        * have grow factor 1.5 (MSVC, Clang, FBVector), with
+                        * exception of GCC that has grow factor of 2.
+                        */
+                       new_capacity = 3 * (new_capacity + 1) / 2;
+                       if (unlikely(new_capacity < str->capacity)) {
+                               new_capacity = min_capacity;
+                               break;
+                       }
+               } while (new_capacity < min_capacity);
+
+               /* Jemalloc optimization. Older jemalloc allocates blocks
+                * large than 4 KiB in 4 KiB chunks, e.g. malloc(5000) will
+                * allocate 8 KiB block and 3 KiB will be wasted. Newer
+                * jemalloc will allocate sizes 5 KiB, 6 KiB (+1 KiB), 7, 8,
+                * 10 (+2), 12, 14, 16, 20 (+4), 24, 28, 32, 40 (+8), and so
+                * on. Size classes will also depends on platform
+                * and compilation keys.
+                *
+                * So optimize for both cases rounding to sizes 4 KiB,
+                * 8 KiB, 12, ..., 32, 40, 48, 56, etc. It will improve
+                * jemalloc and will not pessimize other allocators, because
+                * there is nothing unreasonable in allocating such sizes.
+                *
+                * Check for INT_MAX for the only reason that utility
+                * function align() accepts signed integers.
+                */
+               overhead_capacity = new_capacity + RALLOC_OVERHEAD;
+               if (overhead_capacity > 4096 &&
+                   overhead_capacity > new_capacity &&
+                   overhead_capacity <= INT_MAX) {
+                       overhead_capacity =
+                               align(overhead_capacity,
+                                     1 << (util_last_bit(overhead_capacity) - 
3));
+                       overhead_capacity = align(overhead_capacity, 4096);
+                       new_capacity = overhead_capacity - RALLOC_OVERHEAD;
+               }
+
+               new_buf = reralloc_size(parser, str->buf, new_capacity);
+               if (unlikely(!new_buf))
+                       return false;
+
+               str->buf = new_buf;
+               str->capacity = new_capacity;
+       }
Blank line here.

+       return true;
+}
+
+bool
+glcpp_vprintf(glcpp_parser_t *parser, struct string_buffer *str,
+             const char *fmt, va_list args)
+{
+       int len;
+       va_list args_copy;
+       va_copy(args_copy, args);
+
+#if (defined(_MSC_VER) && _MSC_VER < 1900) || __MINGW32__
+       /* MSVC up to 2013 used non-standard vsnprintf() that returned -1
+        * if the buffer wasn't large enough. Another non-standard function
+        * _vscprintf() should be used to measure length of printf() output.
+        */
+       len = _vscprintf(fmt, args_copy);
+       va_end(args_copy);
Blank line here.

+       if (len < 0) {
+               return false;
Blank line here.

+       if (!glcpp_strbuffer_ensure_capacity(parser, str, len))
+               return false;
Blank line here.

+       vsprintf(str->buf + str->length, fmt, args);
+#else
+       len = vsnprintf(str->buf + str->length, str->capacity - str->length,
+                       fmt, args_copy);
+       va_end(args_copy);
+
+       /* Fail if an error happened in vsnprintf()
+        * or if measured len overflows size_t
+        */
+       if (unlikely(len < 0 || str->length + len + 1 < str->length))
+               return false;
+
+       /* Formatted string is too large for the buffer.
+        * Resize the buffer and print again.
+        */
+       if (str->length + len + 1 > str->capacity) {
+               if (!glcpp_strbuffer_ensure_capacity(parser, str, len))
+                       return false;
+               vsprintf(str->buf + str->length, fmt, args);
+       }
+#endif
+       str->length += len;
+       return true;
+}
+
+bool
+glcpp_printf(glcpp_parser_t *parser, struct string_buffer *str,
+            const char *fmt, ...)
+{
+       bool res;
+       va_list args;
+       va_start(args, fmt);
+       res = glcpp_vprintf(parser, str, fmt, args);
+       va_end(args);
+       return res;
+}
void
  glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...)
@@ -32,20 +160,15 @@ glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const 
char *fmt, ...)
        va_list ap;
parser->error = 1;
-       ralloc_asprintf_rewrite_tail(&parser->info_log,
-                                    &parser->info_log_length,
-                                    "%u:%u(%u): "
-                                    "preprocessor error: ",
-                                    locp->source,
-                                    locp->first_line,
-                                    locp->first_column);
+       glcpp_printf(parser, &parser->info_log,
+                    "%u:%u(%u): preprocessor error: ",
+                    locp->source,
+                    locp->first_line,
+                    locp->first_column);
        va_start(ap, fmt);
-       ralloc_vasprintf_rewrite_tail(&parser->info_log,
-                                     &parser->info_log_length,
-                                     fmt, ap);
+       glcpp_vprintf(parser, &parser->info_log, fmt, ap);
        va_end(ap);
-       ralloc_asprintf_rewrite_tail(&parser->info_log,
-                                    &parser->info_log_length, "\n");
+       glcpp_append(parser, &parser->info_log, "\n");
  }
void
@@ -53,20 +176,15 @@ glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, 
const char *fmt, ...)
  {
        va_list ap;
- ralloc_asprintf_rewrite_tail(&parser->info_log,
-                                    &parser->info_log_length,
-                                    "%u:%u(%u): "
-                                    "preprocessor warning: ",
-                                    locp->source,
-                                    locp->first_line,
-                                    locp->first_column);
+       glcpp_printf(parser, &parser->info_log,
+                    "%u:%u(%u): preprocessor warning: ",
+                    locp->source,
+                    locp->first_line,
+                    locp->first_column);
        va_start(ap, fmt);
-       ralloc_vasprintf_rewrite_tail(&parser->info_log,
-                                     &parser->info_log_length,
-                                     fmt, ap);
+       glcpp_vprintf(parser, &parser->info_log, fmt, ap);
        va_end(ap);
-       ralloc_asprintf_rewrite_tail(&parser->info_log,
-                                    &parser->info_log_length, "\n");
+       glcpp_append(parser, &parser->info_log, "\n");
  }
/* Given str, (that's expected to start with a newline terminator of some
@@ -232,10 +350,14 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, 
char **info_log,
glcpp_parser_resolve_implicit_version(parser); - ralloc_strcat(info_log, parser->info_log);
+       parser->info_log.buf[parser->info_log.length] = '\0';
+       ralloc_strcat(info_log, parser->info_log.buf);
- ralloc_steal(ralloc_ctx, parser->output);
-       *shader = parser->output;
+       parser->output.buf[parser->output.length] = '\0';
+       parser->output.buf = reralloc_size(parser, parser->output.buf,
+                                          parser->output.length + 1);
+       ralloc_steal(ralloc_ctx, parser->output.buf);
+       *shader = parser->output.buf;
errors = parser->error;
        glcpp_parser_destroy (parser);


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to