This patch fixes PR71656 by adding support to rs6000_legitimize_reload_address for POWER9 vector-dform addresses. Previously, -mpower9-dform-vector was disabled when using reload due to this bug and it was not added to the default ISA 3.0 flags. Now that -mpower9-dform-vector works with reload, I have added it to the default ISA 3.0 flags and removed the disabling of the option when using -mno-lra. I also updated rs6000_legitimate_address_p so that it too correctly handles quad addresses.
This has bootstrapped and regtested with no regessions on powerpc64le-linux. Ok for for trunk? This bug also affects the FSF 6 branch. Ok for that branch after the patch has burned in a while on trunk and after the usual bootstrap and regtesting? Peter gcc/ PR target/71656 * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add OPTION_MASK_P9_DFORM_VECTOR. * config/rs6000/rs6000.c (rs6000_option_override_internal): Do not disable -mpower9-dform-vector when using reload. (quad_address_p): Remove 'gpr_p' argument and all associated code. New 'strict' argument. Update all callers. Add strict addressing support. (rs6000_legitimate_offset_address_p): Remove call to virtual_stack_registers_memory_p. (rs6000_legitimize_reload_address): Add quad address support. (rs6000_legitimate_address_p): Move call to quad_address_p above call to virtual_stack_registers_memory_p. Adjust quad_address_p args to account for new strict usage. (rs6000_output_move_128bit): Adjust quad_address_p args to account for new strict usage. * config/rs6000/predicates.md (quad_memory_operand): Likewise. gcc/testsuite/ PR target/71656 * gcc.target/powerpc/pr71656-1.c: New test. * gcc.target/powerpc/pr71656-2.c: New test. Index: gcc/config/rs6000/rs6000-cpus.def =================================================================== --- gcc/config/rs6000/rs6000-cpus.def (revision 237765) +++ gcc/config/rs6000/rs6000-cpus.def (working copy) @@ -61,14 +61,14 @@ | OPTION_MASK_UPPER_REGS_SF) /* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add - P9_MINMAX until the hardware that supports it is available. Do not add - P9_DFORM_VECTOR until LRA is the default register allocator. */ + P9_MINMAX until the hardware that supports it is available. */ #define ISA_3_0_MASKS_SERVER (ISA_2_7_MASKS_SERVER \ | OPTION_MASK_FLOAT128_HW \ | OPTION_MASK_ISEL \ | OPTION_MASK_MODULO \ | OPTION_MASK_P9_FUSION \ | OPTION_MASK_P9_DFORM_SCALAR \ + | OPTION_MASK_P9_DFORM_VECTOR \ | OPTION_MASK_P9_VECTOR) #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 237765) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -4266,13 +4266,10 @@ rs6000_option_override_internal (bool gl rs6000_isa_flags |= OPTION_MASK_TOC_FUSION; /* -mpower9-dform turns on both -mpower9-dform-scalar and - -mpower9-dform-vector. There are currently problems if - -mpower9-dform-vector instructions are enabled when we use the RELOAD - register allocator. */ + -mpower9-dform-vector. */ if (TARGET_P9_DFORM_BOTH > 0) { - if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) - && TARGET_LRA) + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR; if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) @@ -4318,11 +4315,10 @@ rs6000_option_override_internal (bool gl rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR; } - /* There have been bugs with both -mvsx-timode and -mpower9-dform-vector that - don't show up with -mlra, but do show up with -mno-lra. Given -mlra will - become the default once PR 69847 is fixed, turn off the options with - problems by default if -mno-lra was used, and warn if the user explicitly - asked for the option. + /* There have been bugs with -mvsx-timode that don't show up with -mlra, + but do show up with -mno-lra. Given -mlra will become the default once + PR 69847 is fixed, turn off the options with problems by default if + -mno-lra was used, and warn if the user explicitly asked for the option. Enable -mpower9-dform-vector by default if LRA and other power9 options. Enable -mvsx-timode by default if LRA and VSX. */ @@ -4336,15 +4332,6 @@ rs6000_option_override_internal (bool gl else rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; } - - if (TARGET_P9_DFORM_VECTOR) - { - if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) != 0) - warning (0, "-mpower9-dform-vector might need -mlra"); - - else - rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR; - } } else @@ -4352,11 +4339,6 @@ rs6000_option_override_internal (bool gl if (TARGET_VSX && !TARGET_VSX_TIMODE && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0) rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE; - - if (TARGET_VSX && TARGET_P9_VECTOR && !TARGET_P9_DFORM_VECTOR - && TARGET_P9_DFORM_SCALAR && TARGET_P9_DFORM_BOTH < 0 - && (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) == 0) - rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR; } /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 @@ -7243,34 +7225,26 @@ quad_address_offset_p (HOST_WIDE_INT off 3.0 LXV/STXV instruction. */ bool -quad_address_p (rtx addr, machine_mode mode, bool gpr_p) +quad_address_p (rtx addr, machine_mode mode, bool strict) { rtx op0, op1; if (GET_MODE_SIZE (mode) != 16) return false; - if (gpr_p) - { - if (!TARGET_QUAD_MEMORY && !TARGET_SYNC_TI) - return false; - - /* LQ/STQ can handle indirect addresses. */ - if (base_reg_operand (addr, Pmode)) - return true; - } + if (legitimate_indirect_address_p (addr, strict)) + return true; - else - { - if (!mode_supports_vsx_dform_quad (mode)) - return false; - } + if (VECTOR_MODE_P (mode) + && !mode_supports_vsx_dform_quad (mode)) + return false; if (GET_CODE (addr) != PLUS) return false; op0 = XEXP (addr, 0); - if (!base_reg_operand (op0, Pmode)) + if (!REG_P (op0) + || !INT_REG_OK_FOR_BASE_P (op0, strict)) return false; op1 = XEXP (addr, 1); @@ -7639,8 +7613,7 @@ rs6000_legitimate_offset_address_p (mach if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) return false; if (mode_supports_vsx_dform_quad (mode)) - return (virtual_stack_registers_memory_p (x) - || quad_address_p (x, mode, false)); + return quad_address_p (x, mode, strict); if (!reg_offset_addressing_ok_p (mode)) return virtual_stack_registers_memory_p (x); if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) @@ -8543,6 +8516,7 @@ rs6000_legitimize_reload_address (rtx x, int ind_levels ATTRIBUTE_UNUSED, int *win) { bool reg_offset_p = reg_offset_addressing_ok_p (mode); + bool quad_offset_p = mode_supports_vsx_dform_quad (mode); /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */ @@ -8612,6 +8586,7 @@ rs6000_legitimize_reload_address (rtx x, if (TARGET_CMODEL != CMODEL_SMALL && reg_offset_p + && !quad_offset_p && small_toc_ref (x, VOIDmode)) { rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x)); @@ -8629,22 +8604,24 @@ rs6000_legitimize_reload_address (rtx x, } if (GET_CODE (x) == PLUS - && GET_CODE (XEXP (x, 0)) == REG + && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1) - && GET_CODE (XEXP (x, 1)) == CONST_INT + && CONST_INT_P (XEXP (x, 1)) && reg_offset_p && !SPE_VECTOR_MODE (mode) && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) - && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))) + && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))) { HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; HOST_WIDE_INT high = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; - /* Check for 32-bit overflow. */ - if (high + low != val) + /* Check for 32-bit overflow or quad addresses with one of the + four least significant bits set. */ + if (high + low != val + || (quad_offset_p && (low & 0xf))) { *win = 0; return x; @@ -8672,6 +8649,7 @@ rs6000_legitimize_reload_address (rtx x, if (GET_CODE (x) == SYMBOL_REF && reg_offset_p + && !quad_offset_p && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)) && !SPE_VECTOR_MODE (mode) #if TARGET_MACHO @@ -8756,6 +8734,7 @@ rs6000_legitimize_reload_address (rtx x, if (TARGET_TOC && reg_offset_p + && !quad_offset_p && GET_CODE (x) == SYMBOL_REF && use_toc_relative_ref (x, mode)) { @@ -8844,15 +8823,14 @@ rs6000_legitimate_address_p (machine_mod && mode_supports_pre_incdec_p (mode) && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) return 1; - if (virtual_stack_registers_memory_p (x)) - return 1; - /* Handle restricted vector d-form offsets in ISA 3.0. */ if (quad_offset_p) { - if (quad_address_p (x, mode, false)) + if (quad_address_p (x, mode, reg_ok_strict)) return 1; } + else if (virtual_stack_registers_memory_p (x)) + return 1; else if (reg_offset_p) { @@ -20393,7 +20371,7 @@ rs6000_output_move_128bit (rtx operands[ else if (TARGET_VSX && dest_vsx_p) { if (mode_supports_vsx_dform_quad (mode) - && quad_address_p (XEXP (src, 0), mode, false)) + && quad_address_p (XEXP (src, 0), mode, true)) return "lxv %x0,%1"; else if (TARGET_P9_VECTOR) @@ -20431,7 +20409,7 @@ rs6000_output_move_128bit (rtx operands[ else if (TARGET_VSX && src_vsx_p) { if (mode_supports_vsx_dform_quad (mode) - && quad_address_p (XEXP (dest, 0), mode, false)) + && quad_address_p (XEXP (dest, 0), mode, true)) return "stxv %x1,%0"; else if (TARGET_P9_VECTOR) Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (revision 237765) +++ gcc/config/rs6000/predicates.md (working copy) @@ -740,7 +740,7 @@ (define_predicate "quad_memory_operand" if (GET_MODE_SIZE (mode) != 16 || !MEM_P (op) || MEM_ALIGN (op) < 128) return false; - return quad_address_p (XEXP (op, 0), mode, true); + return quad_address_p (XEXP (op, 0), mode, false); }) ;; Return 1 if the operand is suitable for load/store to vector registers with Index: gcc/testsuite/gcc.target/powerpc/pr71656-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr71656-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr71656-1.c (working copy) @@ -0,0 +1,20 @@ +/* Test for reload ICE arising from POWER9 Vector Dform code generation. */ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-O1 -mcpu=power9 -mpower9-dform-vector -mno-lra" } */ + +typedef __attribute__((altivec(vector__))) int type_t; +type_t +func (type_t *src) +{ + asm volatile ("# force the base reg on the load below to be spilled" + : /* no outputs */ + : /* no inputs */ + : "r0", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"); + return src[1]; +} + Index: gcc/testsuite/gcc.target/powerpc/pr71656-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr71656-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr71656-2.c (working copy) @@ -0,0 +1,47 @@ +/* Test for reload ICE arising from POWER9 Vector Dform code generation. */ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-O3 -mcpu=power9 -mpower9-dform-vector -mno-lra -funroll-loops -fno-aggressive-loop-optimizations" } */ + +typedef double vec[3]; +struct vec_t +{ + vec x; + vec y; +}; +int a, j, k, l, m, n, o, p, q; +double b, i; +vec c; +double h[6]; +void func1 (vec); + +void +func2 (double *) +{ + for (; k; k--) + for (; j <= k;) + for (; m <= q; m++) + for (; n <= k; n++) + for (; o <= l; o++) + { + j = p + m + n + o; + h[j] = i; + } +} + +void +func3 (void) +{ + vec_t d; + func1 (d.y); + func2 (&b); + for (; a;) + { + double *e = d.y, *g; + double f; + c[0] = g[0] + f * e[0]; + c[1] = g[1] + f * e[1]; + func1 (c); + } +}