Hi, I realized that the previous version did not handle zero terminated Ada constants correctly as in
$ cat hello.adb procedure Hello is procedure puts (S : String) with Import, Convention => C; X : String(1..8) := "abcdefg" & Ascii.Nul; begin puts ("1234" & Ascii.Nul ); puts (X); end; accidentally strings were doubly nul-terminated, because I forgot to handle merge string sections in assemble_constant_contents, and because get_constant_size increased the string literal by one. Fixed, and added a positive test case for the merging non-zero terminated strings in C. Boot-strapped and regtested on x86_64-pc-linux-gnu. Is it OK for trunk (after pre-condition patches)? Thanks Bernd.
gcc: 2018-08-04 Bernd Edlinger <bernd.edlin...@hotmail.de> * varasm.c (output_constant): Add new parameter merge_strings. Make strings properly zero terminated in merge string sections. (mergeable_string_section): Don't fail if the last char is non-zero. (assemble_variable_contents): Handle merge string sections. (assemble_variable): Likewise. (assemble_constant_contents): Likewise. (output_constant_def_contents): Likewise. (get_constant_size): Remove special handling of STRING_CSTs. (redundant_nul_term_p): New helper function. testsuite: 2018-08-04 Bernd Edlinger <bernd.edlin...@hotmail.de> * gcc.dg/merge-all-constants-1.c: Adjust test. * gcc.dg/merge-all-constants-2.c: New test. Index: gcc/varasm.c =================================================================== --- gcc/varasm.c (revision 263072) +++ gcc/varasm.c (working copy) @@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre static void output_constant_def_contents (rtx); static void output_addressed_constants (tree); static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, - unsigned int, bool); + unsigned int, bool, + bool = false); static void globalize_decl (tree); static bool decl_readonly_section_1 (enum section_category); #ifdef BSS_SECTION_ASM_OP @@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS unit = GET_MODE_SIZE (mode); /* Check for embedded NUL characters. */ - for (i = 0; i < len; i += unit) + for (i = 0; i < len - unit; i += unit) { for (j = 0; j < unit; j++) if (str[i + j] != '\0') @@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char static void assemble_variable_contents (tree decl, const char *name, - bool dont_output_data) + bool dont_output_data, bool merge_strings = false) { /* Do any machine/system dependent processing of the object. */ #ifdef ASM_DECLARE_OBJECT_NAME @@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char output_constant (DECL_INITIAL (decl), tree_to_uhwi (DECL_SIZE_UNIT (decl)), get_variable_align (decl), - false); + false, merge_strings); else /* Leave space for it. */ assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl))); @@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_variable_contents (decl, name, dont_output_data); + assemble_variable_contents (decl, name, dont_output_data, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { unsigned HOST_WIDE_INT int size @@ -3300,12 +3303,7 @@ get_constant_section (tree exp, unsigned int align static HOST_WIDE_INT get_constant_size (tree exp) { - HOST_WIDE_INT size; - - size = int_size_in_bytes (TREE_TYPE (exp)); - if (TREE_CODE (exp) == STRING_CST) - size = MAX (TREE_STRING_LENGTH (exp), size); - return size; + return int_size_in_bytes (TREE_TYPE (exp)); } /* Subroutine of output_constant_def: @@ -3468,7 +3466,8 @@ maybe_output_constant_def_contents (struct constan constant's alignment in bits. */ static void -assemble_constant_contents (tree exp, const char *label, unsigned int align) +assemble_constant_contents (tree exp, const char *label, unsigned int align, + bool merge_strings = false) { HOST_WIDE_INT size; @@ -3478,7 +3477,7 @@ static void targetm.asm_out.declare_constant_name (asm_out_file, label, exp, size); /* Output the value of EXP. */ - output_constant (exp, size, align, false); + output_constant (exp, size, align, false, merge_strings); targetm.asm_out.decl_end (); } @@ -3519,10 +3518,13 @@ output_constant_def_contents (rtx symbol) || (VAR_P (decl) && DECL_IN_CONSTANT_POOL (decl)) ? DECL_ALIGN (decl) : symtab_node::get (decl)->definition_alignment ()); - switch_to_section (get_constant_section (exp, align)); + section *sect = get_constant_section (exp, align); + switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_constant_contents (exp, XSTR (symbol, 0), align); + assemble_constant_contents (exp, XSTR (symbol, 0), align, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { HOST_WIDE_INT size = get_constant_size (exp); @@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool, oc_outer_state *); +/* Find out if a string P of length LEN and character size UNIT + has double nul termination. + The string is already known to have a nul termination at + offset LEN. Find out if there is already a NUL termination + at offset LEN - UNIT. */ + +static bool +redundant_nul_term_p (const char *p, unsigned len, unsigned unit) +{ + gcc_assert(len >= unit); + return !memcmp (p + len, p + len - unit, unit); +} + /* Output assembler code for constant EXP, with no label. This includes the pseudo-op such as ".int" or ".byte", and a newline. Assumes output_addressed_constants has been done on EXP already. @@ -4812,7 +4850,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT, static unsigned HOST_WIDE_INT output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align, - bool reverse) + bool reverse, bool merge_strings /* = false */) { enum tree_code code; unsigned HOST_WIDE_INT thissize; @@ -4940,8 +4978,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT case CONSTRUCTOR: return output_constructor (exp, size, align, reverse, NULL); case STRING_CST: - thissize - = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size); + thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp); + if (thissize > size && merge_strings + && !redundant_nul_term_p (TREE_STRING_POINTER (exp), + size, thissize - size)) + ; + else + thissize = MIN (thissize, size); gcc_checking_assert (check_string_literal (exp, size)); assemble_string (TREE_STRING_POINTER (exp), thissize); break; Index: gcc/testsuite/gcc.dg/merge-all-constants-1.c =================================================================== --- gcc/testsuite/gcc.dg/merge-all-constants-1.c (revision 263072) +++ gcc/testsuite/gcc.dg/merge-all-constants-1.c (working copy) @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-w -O2 -fmerge-all-constants" } */ -const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz"; const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz"; -const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz"; /* { dg-final { scan-assembler-not "\.rodata\.str" } } */ Index: gcc/testsuite/gcc.dg/merge-all-constants-2.c =================================================================== --- gcc/testsuite/gcc.dg/merge-all-constants-2.c (revision 0) +++ gcc/testsuite/gcc.dg/merge-all-constants-2.c (working copy) --- /dev/null 2018-07-24 12:25:16.122475518 +0200 +++ gcc/testsuite/gcc.dg/merge-all-constants-2.c 2018-08-07 10:14:35.384190377 +0200 @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-w -O2 -fmerge-all-constants" } */ + +const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str2[37] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; + +/* { dg-final { scan-assembler-not "\.rodata\[\n\r\]" } } */