4 on big endian systems

Tamar Christina Thu, 18 Aug 2016 02:15:59 -0700

Hi all,

This fixes a bug in the vector load functions in which they load the
vector in the wrong order for big endian systems. This patch flips the
order conditionally in the vec_concats.


No testcase given because plenty of existing tests for vld functions.
Ran regression tests on aarch64_be-none-elf and aarch64-none-elf.
Vldx tests now pass on aarch64_be-none-elf and no regressions on both.

Ok for trunk?

I do not have commit rights so if ok can someone apply it for me?

Thanks,
Tamar

gcc/
2016-08-16  Tamar Christina  <tamar.christ...@arm.com>

        * gcc/config/aarch64/aarch64-simd.md
        (aarch64_ld2<mode>_dreg_le): New.
        (aarch64_ld2<mode>_dreg_be): New.
        (aarch64_ld2<mode>_dreg): Removed.
        (aarch64_ld3<mode>_dreg_le): New.
        (aarch64_ld3<mode>_dreg_be): New.
        (aarch64_ld3<mode>_dreg): Removed.
        (aarch64_ld4<mode>_dreg_le): New.
        (aarch64_ld4<mode>_dreg_be): New.
        (aarch64_ld4<mode>_dreg): Removed.
        (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Wrapper around _le, _be.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index feb5e96..7d3dfe8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4861,7 +4861,7 @@
   DONE;
 })
 
-(define_insn "aarch64_ld2<mode>_dreg"
+(define_insn "aarch64_ld2<mode>_dreg_le"
   [(set (match_operand:OI 0 "register_operand" "=w")
 	(subreg:OI
 	  (vec_concat:<VRL2>
@@ -4874,12 +4874,30 @@
 	     (unspec:VD [(match_dup 1)]
 			UNSPEC_LD2)
 	     (vec_duplicate:VD (const_int 0)))) 0))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
   [(set_attr "type" "neon_load2_2reg<q>")]
 )
 
-(define_insn "aarch64_ld2<mode>_dreg"
+(define_insn "aarch64_ld2<mode>_dreg_be"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(subreg:OI
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (vec_duplicate:VD (const_int 0))
+	     (unspec:VD
+		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
+		UNSPEC_LD2))
+	    (vec_concat:<VDBL>
+	     (vec_duplicate:VD (const_int 0))
+	     (unspec:VD [(match_dup 1)]
+			UNSPEC_LD2))) 0))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load2_2reg<q>")]
+)
+
+(define_insn "aarch64_ld2<mode>_dreg_le"
   [(set (match_operand:OI 0 "register_operand" "=w")
 	(subreg:OI
 	  (vec_concat:<VRL2>
@@ -4892,12 +4910,30 @@
 	     (unspec:DX [(match_dup 1)]
 			UNSPEC_LD2)
 	     (const_int 0))) 0))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "ld1\\t{%S0.1d - %T0.1d}, %1"
+  [(set_attr "type" "neon_load1_2reg<q>")]
+)
+
+(define_insn "aarch64_ld2<mode>_dreg_be"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(subreg:OI
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (const_int 0)
+	     (unspec:DX
+		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
+		UNSPEC_LD2))
+	    (vec_concat:<VDBL>
+	     (const_int 0)
+	     (unspec:DX [(match_dup 1)]
+			UNSPEC_LD2))) 0))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "ld1\\t{%S0.1d - %T0.1d}, %1"
   [(set_attr "type" "neon_load1_2reg<q>")]
 )
 
-(define_insn "aarch64_ld3<mode>_dreg"
+(define_insn "aarch64_ld3<mode>_dreg_le"
   [(set (match_operand:CI 0 "register_operand" "=w")
 	(subreg:CI
 	 (vec_concat:<VRL3>
@@ -4915,12 +4951,35 @@
 	     (unspec:VD [(match_dup 1)]
 			UNSPEC_LD3)
 	     (vec_duplicate:VD (const_int 0)))) 0))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
   [(set_attr "type" "neon_load3_3reg<q>")]
 )
 
-(define_insn "aarch64_ld3<mode>_dreg"
+(define_insn "aarch64_ld3<mode>_dreg_be"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(subreg:CI
+	 (vec_concat:<VRL3>
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (vec_duplicate:VD (const_int 0))
+	     (unspec:VD
+		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
+		UNSPEC_LD3))
+	    (vec_concat:<VDBL>
+	     (vec_duplicate:VD (const_int 0))
+	     (unspec:VD [(match_dup 1)]
+			UNSPEC_LD3)))
+	  (vec_concat:<VDBL>
+	     (vec_duplicate:VD (const_int 0))
+	     (unspec:VD [(match_dup 1)]
+			UNSPEC_LD3))) 0))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load3_3reg<q>")]
+)
+
+(define_insn "aarch64_ld3<mode>_dreg_le"
   [(set (match_operand:CI 0 "register_operand" "=w")
 	(subreg:CI
 	 (vec_concat:<VRL3>
@@ -4938,12 +4997,35 @@
 	     (unspec:DX [(match_dup 1)]
 			UNSPEC_LD3)
 	     (const_int 0))) 0))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "ld1\\t{%S0.1d - %U0.1d}, %1"
+  [(set_attr "type" "neon_load1_3reg<q>")]
+)
+
+(define_insn "aarch64_ld3<mode>_dreg_be"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(subreg:CI
+	 (vec_concat:<VRL3>
+	  (vec_concat:<VRL2>
+	    (vec_concat:<VDBL>
+	     (const_int 0)
+	     (unspec:DX
+		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
+		UNSPEC_LD3))
+	    (vec_concat:<VDBL>
+	     (const_int 0)
+	     (unspec:DX [(match_dup 1)]
+			UNSPEC_LD3)))
+	  (vec_concat:<VDBL>
+	     (const_int 0)
+	     (unspec:DX [(match_dup 1)]
+			UNSPEC_LD3))) 0))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "ld1\\t{%S0.1d - %U0.1d}, %1"
   [(set_attr "type" "neon_load1_3reg<q>")]
 )
 
-(define_insn "aarch64_ld4<mode>_dreg"
+(define_insn "aarch64_ld4<mode>_dreg_le"
   [(set (match_operand:XI 0 "register_operand" "=w")
 	(subreg:XI
 	 (vec_concat:<VRL4>
@@ -4954,9 +5036,9 @@
 		UNSPEC_LD4)
 	       (vec_duplicate:VD (const_int 0)))
 	      (vec_concat:<VDBL>
-	        (unspec:VD [(match_dup 1)]
+		(unspec:VD [(match_dup 1)]
 			UNSPEC_LD4)
-	        (vec_duplicate:VD (const_int 0))))
+		(vec_duplicate:VD (const_int 0))))
 	   (vec_concat:<VRL2>
 	     (vec_concat:<VDBL>
 	       (unspec:VD [(match_dup 1)]
@@ -4966,12 +5048,40 @@
 	       (unspec:VD [(match_dup 1)]
 			UNSPEC_LD4)
 	       (vec_duplicate:VD (const_int 0))))) 0))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load4_4reg<q>")]
+)
+
+(define_insn "aarch64_ld4<mode>_dreg_be"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(subreg:XI
+	 (vec_concat:<VRL4>
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (vec_duplicate:VD (const_int 0))
+	       (unspec:VD
+		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
+		UNSPEC_LD4))
+	      (vec_concat:<VDBL>
+		(vec_duplicate:VD (const_int 0))
+		(unspec:VD [(match_dup 1)]
+			UNSPEC_LD4)))
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (vec_duplicate:VD (const_int 0))
+	       (unspec:VD [(match_dup 1)]
+			UNSPEC_LD4))
+	     (vec_concat:<VDBL>
+	       (vec_duplicate:VD (const_int 0))
+	       (unspec:VD [(match_dup 1)]
+			UNSPEC_LD4)))) 0))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
   [(set_attr "type" "neon_load4_4reg<q>")]
 )
 
-(define_insn "aarch64_ld4<mode>_dreg"
+(define_insn "aarch64_ld4<mode>_dreg_le"
   [(set (match_operand:XI 0 "register_operand" "=w")
 	(subreg:XI
 	 (vec_concat:<VRL4>
@@ -4984,7 +5094,7 @@
 	      (vec_concat:<VDBL>
 	        (unspec:DX [(match_dup 1)]
 			UNSPEC_LD4)
-	        (const_int 0)))
+		(const_int 0)))
 	   (vec_concat:<VRL2>
 	     (vec_concat:<VDBL>
 	       (unspec:DX [(match_dup 1)]
@@ -4994,7 +5104,35 @@
 	       (unspec:DX [(match_dup 1)]
 			UNSPEC_LD4)
 	       (const_int 0)))) 0))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "ld1\\t{%S0.1d - %V0.1d}, %1"
+  [(set_attr "type" "neon_load1_4reg<q>")]
+)
+
+(define_insn "aarch64_ld4<mode>_dreg_be"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(subreg:XI
+	 (vec_concat:<VRL4>
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (const_int 0)
+	       (unspec:DX
+		[(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
+		UNSPEC_LD4))
+	      (vec_concat:<VDBL>
+		(const_int 0)
+		(unspec:DX [(match_dup 1)]
+			UNSPEC_LD4)))
+	   (vec_concat:<VRL2>
+	     (vec_concat:<VDBL>
+	       (const_int 0)
+	       (unspec:DX [(match_dup 1)]
+			UNSPEC_LD4))
+	     (vec_concat:<VDBL>
+	       (const_int 0)
+	       (unspec:DX [(match_dup 1)]
+			UNSPEC_LD4)))) 0))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "ld1\\t{%S0.1d - %V0.1d}, %1"
   [(set_attr "type" "neon_load1_4reg<q>")]
 )
@@ -5008,7 +5146,10 @@
   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
   set_mem_size (mem, <VSTRUCT:nregs> * 8);
 
-  emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
+   if (BYTES_BIG_ENDIAN)
+	emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0], mem));
+   else
+	emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0], mem));
   DONE;
 })

[PATCH][Aarch64][gcc] Fix vld2/3/4 on big endian systems

Reply via email to