Again, with patch.... On 08/04/18 17:43, Bernd Edlinger wrote: > Hi! > > > This patch is inspired by Olivier's feedback to my previous patch on the > zero-termination of Ada STRING_CST. > > The idea is that strings that do not have embedded nul characters _and_ > do not happen to be zero-terminated in the DECL_UNIT_SIZE, are currently > not in the merge string sections. To be in the merge string section > they need a terminating nul character, that gets written directly > in varasm while assembling the string constant. This patch uses > the new string properties that my previous patch series implements, > and is based on the other patches here: > > [PATCH] Check the STRING_CSTs in varasm.c > https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00331.html > > [PATCH] Handle overlength strings in the C FE > https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00142.html > > [PATCH] Handle overlength strings in C++ FE > https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00045.html > Approved: https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00105.html > > [PATCH] Make GO string literals properly NUL terminated > https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01931.html > > [PATCH] [Ada] Make middle-end string literals NUL terminated > https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01929.html > > > The existing test case gcc.dg/merge-all-constants-1.c > contains two overlength strings that get now stripped down > by the C FE, and look in the middle end exactly like normal > Ada, Fortran or Go strings. And get now allocated > in the merge.str section, unless they have an embedded > nul character, which I changed to make the test pass again. > > Olivier, could you add test cases from the Ada side to this? > > > Bootstrapped and reg-tested on x86_64-pc-linux-gnu. > Is it OK for trunk (after all pre-condition patches > are approved/committed)? > > > Thanks > Bernd.
gcc: 2018-08-04 Bernd Edlinger <bernd.edlin...@hotmail.de>
* varasm.c (output_constant): Add new parameter merge_strings. (assemble_variable_contents): Likewise. (mergeable_string_section): Don't fail if the last char is non-zero. (assemble_variable): Pass merge_strings for string merge sections. (redundant_nul_term_p): New helper function. (output_constant): Make strings properly zero terminated in merge string sections. testsuite: 2018-08-04 Bernd Edlinger <bernd.edlin...@hotmail.de> * gcc.dg/merge-all-constants-1.c: Adjust test. Index: gcc/varasm.c =================================================================== --- gcc/varasm.c (revision 263072) +++ gcc/varasm.c (working copy) @@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre static void output_constant_def_contents (rtx); static void output_addressed_constants (tree); static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, - unsigned int, bool); + unsigned int, bool, + bool = false); static void globalize_decl (tree); static bool decl_readonly_section_1 (enum section_category); #ifdef BSS_SECTION_ASM_OP @@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS unit = GET_MODE_SIZE (mode); /* Check for embedded NUL characters. */ - for (i = 0; i < len; i += unit) + for (i = 0; i < len - unit; i += unit) { for (j = 0; j < unit; j++) if (str[i + j] != '\0') @@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char static void assemble_variable_contents (tree decl, const char *name, - bool dont_output_data) + bool dont_output_data, bool merge_strings = false) { /* Do any machine/system dependent processing of the object. */ #ifdef ASM_DECLARE_OBJECT_NAME @@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char output_constant (DECL_INITIAL (decl), tree_to_uhwi (DECL_SIZE_UNIT (decl)), get_variable_align (decl), - false); + false, merge_strings); else /* Leave space for it. */ assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl))); @@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_variable_contents (decl, name, dont_output_data); + assemble_variable_contents (decl, name, dont_output_data, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { unsigned HOST_WIDE_INT int size @@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool, oc_outer_state *); +/* Find out if a string P of length LEN and character size UNIT + has double nul termination. + The string is already known to have a nul termination at + offset LEN. Find out if there is already a NUL termination + at offset LEN - UNIT. */ + +static bool +redundant_nul_term_p (const char *p, unsigned len, unsigned unit) +{ + gcc_assert(len >= unit); + return !memcmp (p + len, p + len - unit, unit); +} + /* Output assembler code for constant EXP, with no label. This includes the pseudo-op such as ".int" or ".byte", and a newline. Assumes output_addressed_constants has been done on EXP already. @@ -4812,7 +4828,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT, static unsigned HOST_WIDE_INT output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align, - bool reverse) + bool reverse, bool merge_strings /* = false */) { enum tree_code code; unsigned HOST_WIDE_INT thissize; @@ -4940,8 +4956,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT case CONSTRUCTOR: return output_constructor (exp, size, align, reverse, NULL); case STRING_CST: - thissize - = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size); + thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp); + if (thissize > size && merge_strings + && !redundant_nul_term_p (TREE_STRING_POINTER (exp), + size, thissize - size)) + ; + else + thissize = MIN (thissize, size); gcc_checking_assert (check_string_literal (exp, thissize)); assemble_string (TREE_STRING_POINTER (exp), thissize); break; case VECTOR_CST: Index: gcc/testsuite/gcc.dg/merge-all-constants-1.c =================================================================== --- gcc/testsuite/gcc.dg/merge-all-constants-1.c (revision 263072) +++ gcc/testsuite/gcc.dg/merge-all-constants-1.c (working copy) @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-w -O2 -fmerge-all-constants" } */ -const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz"; const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz"; -const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz"; /* { dg-final { scan-assembler-not "\.rodata\.str" } } */