ok with the following changes (and after testing). + * sizeof (char*)); for (unsigned j = 0; j < total_num - num_array[0]; j++) - module->string_array[j] = xstrdup (gcov_read_string ()); + string_array[j] = xstrdup (gcov_read_string ()); + + k = 0; + for (unsigned j = 1; j < (unsigned)num_lipo_cc1_string_kind; j++)
missing space. + { + if (num_array[j] == 0) + continue; + j_end += num_array[j]; + len += strlen (DELIMITER) + 1; /* [<FLAG> */ + for (; k < j_end; k++) + len += strlen (string_array[k]) + 1; /* 1 for delimiter of ']' */ + } + saved_cc1_strings = (char *) xmalloc (len + 1); + saved_cc1_strings[0] = 0; + + j_end = 0; + k = 0; + for (unsigned j = 1; j < (unsigned)num_lipo_cc1_string_kind; j++) missing space. + +struct lipo_parsed_cc1_string * +lipo_parse_saved_cc1_string (const char *src, char *str, bool parse_cl_args_only) +{ + char *substr_begin; + unsigned size = sizeof (struct lipo_parsed_cc1_string); + + struct lipo_parsed_cc1_string *ret = (struct lipo_parsed_cc1_string *) + xmalloc (size); + memset (ret, 0, size); use XCNEW instead to allocate and initialize. + ret->source_filename = src; + + substr_begin = find_substr (str); + if (substr_begin == str + strlen(str)) if (*substr_begin == '\0') is faster. + k = k_cpp_includes; + break; + case COMMAND_ARG_FLAG: + k = k_lipo_cl_args; + break; + default: + gcc_unreachable (); } - for (pdir = system_paths; pdir; pdir = pdir->next) - num_system_paths++; + substr_end = find_substr (substr_begin + 2); 2 --> strlen(DELIMITER) + 1 +/* The following defines are for the saved_cc1_strings encoding. */ + +#define DELIMITER "[" +#define DELIMITER2 "]" Suggest changing DELIMITER to LIPO_STR_DELIM On Tue, Oct 6, 2015 at 11:40 AM, Rong Xu <x...@google.com> wrote: > Here is the patch set 2 that integrates David's comments. Note that > this uses the combined strlen (i.e. encoding compressed and > uncompressed strlen into one gcov_unsigned_t). > > Testing is ongoing. > > -Rong > > On Tue, Oct 6, 2015 at 11:30 AM, Rong Xu <x...@google.com> wrote: >> It's 1:3 to 1:4 in the programs I tested. But it really depends on how >> the options are used. I think your idea of using combined strlen works >> better. >> I just make the code a little clumsy but it does not cause any >> performance issue. >> >> On Tue, Oct 6, 2015 at 10:21 AM, Xinliang David Li <davi...@google.com> >> wrote: >>> On Tue, Oct 6, 2015 at 9:26 AM, Rong Xu <x...@google.com> wrote: >>>> On Mon, Oct 5, 2015 at 5:33 PM, Xinliang David Li <davi...@google.com> >>>> wrote: >>>>> unsigned ggc_memory = gcov_read_unsigned (); >>>>> + unsigned marker = 0, len = 0, k; >>>>> + char **string_array, *saved_cc1_strings; >>>>> + >>>>> for (unsigned j = 0; j < 7; j++) >>>>> >>>>> >>>>> Do not use hard coded number. Use the enum defined in coverage.c. >>>> >>>> OK. >>>> >>>>> >>>>> >>>>> + string_array[j] = xstrdup (gcov_read_string ()); >>>>> + >>>>> + k = 0; >>>>> + for (unsigned j = 1; j < 7; j++) >>>>> >>>>> Do not use hard coded number. >>>> >>>> OK. >>>> >>>>> >>>>> >>>>> + { >>>>> + if (num_array[j] == 0) >>>>> + continue; >>>>> + marker += num_array[j]; >>>>> >>>>> It is better to read if the name of variable 'marker' is changed to >>>>> 'j_end' or something similar >>>>> >>>>> For all the substrings of 'j' kind, there should be just one marker, >>>>> right? It looks like here you introduce one marker per string, not one >>>>> marker per string kind. >>>> >>>> I don't understand what you meant here. "marker" is fixed for each j >>>> substring (one option kind) -- it the end index of the sub-string >>>> array. k-loop is for each string. >>>> >>> >>> That was a wrong comment from me. Discard it. >>> >>>>> >>>>> + len += 3; /* [[<FLAG> */ >>>>> >>>>> Same here for hard coded value. >>>>> >>>>> + for (; k < marker; k++) >>>>> + len += strlen (string_array[k]) + 1; /* 1 for delimter of >>>>> ']' */ >>>>> >>>>> Why do we need one ']' per string? >>>> >>>> This is because the options strings can contain space ' '. I cannot >>>> use space as the delimiter, neither is \0 as it is the end of the >>>> string of the encoded string. >>> >>> Ok -- this allows you to avoid string copy during parsing. >>>> >>>>> >>>>> >>>>> + } >>>>> + saved_cc1_strings = (char *) xmalloc (len + 1); >>>>> + saved_cc1_strings[0] = 0; >>>>> + >>>>> + marker = 0; >>>>> + k = 0; >>>>> + for (unsigned j = 1; j < 7; j++) >>>>> >>>>> Same here for 7. >>>> >>>> will fix in the new patch. >>>> >>>>> >>>>> + { >>>>> + static const char lipo_string_flags[6] = {'Q', 'B', 'S', >>>>> 'D','I', 'C'}; >>>>> + if (num_array[j] == 0) >>>>> + continue; >>>>> + marker += num_array[j]; >>>>> >>>>> Suggest changing marker to j_end >>>> OK. >>>> >>>>> >>>>> + sprintf (saved_cc1_strings, "%s[[%c", saved_cc1_strings, >>>>> + lipo_string_flags[j - 1]); >>>>> + for (; k < marker; k++) >>>>> + { >>>>> + sprintf (saved_cc1_strings, "%s%s]", saved_cc1_strings, >>>>> + string_array[k]); >>>>> >>>>> +#define DELIMTER "[[" >>>>> >>>>> Why double '[' ? >>>> I will change to single '['. >>>> >>>>> >>>>> +#define DELIMTER2 "]" >>>>> +#define QUOTE_PATH_FLAG 'Q' >>>>> +#define BRACKET_PATH_FLAG 'B' >>>>> +#define SYSTEM_PATH_FLAG 'S' >>>>> +#define D_U_OPTION_FLAG 'D' >>>>> +#define INCLUDE_OPTION_FLAG 'I' >>>>> +#define COMMAND_ARG_FLAG 'C' >>>>> + >>>>> +enum lipo_cc1_string_kind { >>>>> + k_quote_paths = 0, >>>>> + k_bracket_paths, >>>>> + k_system_paths, >>>>> + k_cpp_defines, >>>>> + k_cpp_includes, >>>>> + k_lipo_cl_args, >>>>> + num_lipo_cc1_string_kind >>>>> +}; >>>>> + >>>>> +struct lipo_parsed_cc1_string { >>>>> + const char* source_filename; >>>>> + unsigned num[num_lipo_cc1_string_kind]; >>>>> + char **strings[num_lipo_cc1_string_kind]; >>>>> +}; >>>>> + >>>>> +struct lipo_parsed_cc1_string * >>>>> +lipo_parse_saved_cc1_string (const char *src, char *str, >>>>> + bool parse_cl_args_only); >>>>> +void free_parsed_string (struct lipo_parsed_cc1_string *string); >>>>> + >>>>> >>>>> Declare above in a header file. >>>> >>>> OK. >>>> >>>>> >>>>> >>>>> /* Returns true if the command-line arguments stored in the given >>>>> module-infos >>>>> are incompatible. */ >>>>> bool >>>>> -incompatible_cl_args (struct gcov_module_info* mod_info1, >>>>> - struct gcov_module_info* mod_info2) >>>>> +incompatible_cl_args (struct lipo_parsed_cc1_string* mod_info1, >>>>> + struct lipo_parsed_cc1_string* mod_info2) >>>>> >>>>> Fix formating. >>>> OK. >>>>> >>>>> { >>>>> { >>>>> @@ -1647,7 +1679,7 @@ build_var (tree fn_decl, tree type, int counter) >>>>> /* Creates the gcov_fn_info RECORD_TYPE. */ >>>>> >>>>> NULL_TREE, get_gcov_unsigned_t ()); >>>>> DECL_CHAIN (field) = fields; >>>>> fields = field; >>>>> >>>>> - /* Num bracket paths */ >>>>> + /* cc1_uncompressed_strlen field */ >>>>> field = build_decl (BUILTINS_LOCATION, FIELD_DECL, >>>>> NULL_TREE, get_gcov_unsigned_t ()); >>>>> DECL_CHAIN (field) = fields; >>>>> fields = field; >>>>> >>>>> >>>>> Why do we need to store uncompressed string length? If there is need >>>>> to do that, I suggest combine uncompressed length and compressed >>>>> length into one 32bit integer. (16/16, or 17/15 split) >>>> >>>> In theory, I don't need the uncompressed length, But I would need to >>>> guess the uncompressed length to allocate the buffer. If the >>>> decompressing fails with insufficient space, I need to have another >>>> guess and do it again. I think it's easier just save the uncompressed >>>> size to the struct. I think combine the two sizes into one >>>> gcov_unsigned_t is a good idea. We don't expect the string size are >>>> very big. >>> >>> What is the usual compression ratio? If you guess it right most of the >>> time, just get rid of the uncompressed length. >>> >>> David >>> >>>>> >>>>> >>>>> David >>>>> >>>>> On Mon, Oct 5, 2015 at 3:51 PM, Rong Xu <x...@google.com> wrote: >>>>>> Hi, >>>>>> >>>>>> This patch is for google branch only. >>>>>> >>>>>> It encodes and compresses various cc1 option strings in >>>>>> gcov_module_info to reduce the lipo instrumented object size. The >>>>>> savings are from string compression and the reduced number of >>>>>> relocations. >>>>>> >>>>>> More specifically, we replace the following fields in gcov_module_info >>>>>> gcov_unsigned_t num_quote_paths; >>>>>> gcov_unsigned_t num_bracket_paths; >>>>>> gcov_unsigned_t num_system_paths; >>>>>> gcov_unsigned_t num_cpp_defines; >>>>>> gcov_unsigned_t num_cpp_includes; >>>>>> gcov_unsigned_t num_cl_args; >>>>>> char *string_array[1]; >>>>>> with >>>>>> gcov_unsigned_t cc1_strlen; >>>>>> gcov_unsigned_t cc1_uncompressed_strlen; >>>>>> char *saved_cc1_strings; >>>>>> >>>>>> The new saved_cc1_strings are zlib compressed string. >>>>>> >>>>>> Tested with google internal benchmarks. >>>>>> >>>>>> Thanks, >>>>>> >>>>>> -Rong