Again, with patch....

On 08/04/18 17:43, Bernd Edlinger wrote:
> Hi!
> 
> 
> This patch is inspired by Olivier's feedback to my previous patch on the
> zero-termination of Ada STRING_CST.
> 
> The idea is that strings that do not have embedded nul characters _and_
> do not happen to be zero-terminated in the DECL_UNIT_SIZE, are currently
> not in the merge string sections.  To be in the merge string section
> they need a terminating nul character, that gets written directly
> in varasm while assembling the string constant.  This patch uses
> the new string properties that my previous patch series implements,
> and is based on the other patches here:
> 
> [PATCH] Check the STRING_CSTs in varasm.c
> https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00331.html
> 
> [PATCH] Handle overlength strings in the C FE
> https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00142.html
> 
> [PATCH] Handle overlength strings in C++ FE
> https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00045.html
> Approved: https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00105.html
> 
> [PATCH] Make GO string literals properly NUL terminated
> https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01931.html
> 
> [PATCH] [Ada] Make middle-end string literals NUL terminated
> https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01929.html
> 
> 
> The existing test case gcc.dg/merge-all-constants-1.c
> contains two overlength strings that get now stripped down
> by the C FE, and look in the middle end exactly like normal
> Ada, Fortran or Go strings.  And get now allocated
> in the merge.str section, unless they have an embedded
> nul character, which I changed to make the test pass again.
> 
> Olivier, could you add test cases from the Ada side to this?
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk (after all pre-condition patches
> are approved/committed)?
> 
> 
> Thanks
> Bernd.
gcc:
2018-08-04  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* varasm.c (output_constant): Add new parameter merge_strings.
	(assemble_variable_contents): Likewise.
	(mergeable_string_section): Don't fail if the last char is non-zero.
	(assemble_variable): Pass merge_strings for string merge sections.
	(redundant_nul_term_p): New helper function.
	(output_constant): Make strings properly zero terminated in merge
	string sections.

testsuite:
2018-08-04  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* gcc.dg/merge-all-constants-1.c: Adjust test.

Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c	(revision 263072)
+++ gcc/varasm.c	(working copy)
@@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre
 static void output_constant_def_contents (rtx);
 static void output_addressed_constants (tree);
 static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
-					       unsigned int, bool);
+					       unsigned int, bool,
+					       bool = false);
 static void globalize_decl (tree);
 static bool decl_readonly_section_1 (enum section_category);
 #ifdef BSS_SECTION_ASM_OP
@@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS
 	  unit = GET_MODE_SIZE (mode);
 
 	  /* Check for embedded NUL characters.  */
-	  for (i = 0; i < len; i += unit)
+	  for (i = 0; i < len - unit; i += unit)
 	    {
 	      for (j = 0; j < unit; j++)
 		if (str[i + j] != '\0')
@@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char
 
 static void
 assemble_variable_contents (tree decl, const char *name,
-			    bool dont_output_data)
+			    bool dont_output_data, bool merge_strings = false)
 {
   /* Do any machine/system dependent processing of the object.  */
 #ifdef ASM_DECLARE_OBJECT_NAME
@@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char
 	output_constant (DECL_INITIAL (decl),
 			 tree_to_uhwi (DECL_SIZE_UNIT (decl)),
 			 get_variable_align (decl),
-			 false);
+			 false, merge_strings);
       else
 	/* Leave space for it.  */
 	assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl)));
@@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB
 	switch_to_section (sect);
       if (align > BITS_PER_UNIT)
 	ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
-      assemble_variable_contents (decl, name, dont_output_data);
+      assemble_variable_contents (decl, name, dont_output_data,
+				  sect->common.flags & SECTION_MERGE
+				  && sect->common.flags & SECTION_STRINGS);
       if (asan_protected)
 	{
 	  unsigned HOST_WIDE_INT int size
@@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT
 output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool,
 		    oc_outer_state *);
 
+/* Find out if a string P of length LEN and character size UNIT
+   has double nul termination.
+   The string is already known to have a nul termination at
+   offset LEN.  Find out if there is already a NUL termination
+   at offset LEN - UNIT.  */
+
+static bool
+redundant_nul_term_p (const char *p, unsigned len, unsigned unit)
+{
+  gcc_assert(len >= unit);
+  return !memcmp (p + len, p + len - unit, unit);
+}
+
 /* Output assembler code for constant EXP, with no label.
    This includes the pseudo-op such as ".int" or ".byte", and a newline.
    Assumes output_addressed_constants has been done on EXP already.
@@ -4812,7 +4828,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT,
 
 static unsigned HOST_WIDE_INT
 output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align,
-		 bool reverse)
+		 bool reverse, bool merge_strings /* = false */)
 {
   enum tree_code code;
   unsigned HOST_WIDE_INT thissize;
@@ -4940,8 +4956,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT
 	case CONSTRUCTOR:
 	  return output_constructor (exp, size, align, reverse, NULL);
 	case STRING_CST:
-	  thissize
-	    = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size);
+	  thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp);
+	  if (thissize > size && merge_strings
+	      && !redundant_nul_term_p (TREE_STRING_POINTER (exp),
+					size, thissize - size))
+	    ;
+ 	  else
+	    thissize = MIN (thissize, size);
 	  gcc_checking_assert (check_string_literal (exp, thissize));
 	  assemble_string (TREE_STRING_POINTER (exp), thissize);
 	  break;
 	case VECTOR_CST:
Index: gcc/testsuite/gcc.dg/merge-all-constants-1.c
===================================================================
--- gcc/testsuite/gcc.dg/merge-all-constants-1.c	(revision 263072)
+++ gcc/testsuite/gcc.dg/merge-all-constants-1.c	(working copy)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-w -O2 -fmerge-all-constants" } */
 
-const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz";
 const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz";
-const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz";
 
 /* { dg-final { scan-assembler-not "\.rodata\.str" } } */

Reply via email to