Hi, the attached patch adds F0_REGNUM ... F15_REGNUM and uses these throughout the s390.c file. The FPR numbering in the s390 backend is not obvious and this hopefully makes it easier to get right for me.
Committed to mainline after regtesting on s390 and s390x. Bye, -Andreas- 2013-07-08 Andreas Krebbel <andreas.kreb...@de.ibm.com> * config/s390/s390.c: Rename cfun_set_fpr_bit to cfun_set_fpr_save and cfun_fpr_bit_p to cfun_fpr_save_p. (s390_frame_area, s390_register_info, s390_frame_info) (s390_emit_prologue, s390_emit_epilogue) (s390_conditional_register_usage): Use the *_REGNUM macros for FPR register numbers. * config/s390/s390.h: Define *_REGNUM macros for floating point register numbers. --- gcc/config/s390/s390.c | 106 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! gcc/config/s390/s390.h | 17 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-), 105 modifications(!) Index: gcc/config/s390/s390.c =================================================================== *** gcc/config/s390/s390.c.orig --- gcc/config/s390/s390.c *************** struct GTY(()) machine_function *** 379,388 **** #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs) #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \ cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG) ! #define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |= \ ! (1 << (BITNUM))) ! #define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap & \ ! (1 << (BITNUM)))) /* Number of GPRs and FPRs used for argument passing. */ #define GP_ARG_NUM_REG 5 --- 379,388 ---- #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs) #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \ cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG) ! #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \ ! (1 << (REGNO - F0_REGNUM))) ! #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \ ! (1 << (REGNO - F0_REGNUM)))) /* Number of GPRs and FPRs used for argument passing. */ #define GP_ARG_NUM_REG 5 *************** static void *** 7451,7457 **** s390_frame_area (int *area_bottom, int *area_top) { int b, t; - int i; b = INT_MAX; t = INT_MIN; --- 7451,7456 ---- *************** s390_frame_area (int *area_bottom, int * *** 7472,7484 **** } if (!TARGET_64BIT) ! for (i = 2; i < 4; i++) ! if (cfun_fpr_bit_p (i)) { ! b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8); ! t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8); } ! *area_bottom = b; *area_top = t; } --- 7471,7488 ---- } if (!TARGET_64BIT) ! { ! if (cfun_fpr_save_p (F4_REGNUM)) { ! b = MIN (b, cfun_frame_layout.f4_offset); ! t = MAX (t, cfun_frame_layout.f4_offset + 8); } ! if (cfun_fpr_save_p (F6_REGNUM)) ! { ! b = MIN (b, cfun_frame_layout.f4_offset + 8); ! t = MAX (t, cfun_frame_layout.f4_offset + 16); ! } ! } *area_bottom = b; *area_top = t; } *************** s390_register_info (int clobbered_regs[] *** 7505,7511 **** cfun_frame_layout.fpr_bitmap = 0; cfun_frame_layout.high_fprs = 0; if (TARGET_64BIT) ! for (i = 24; i < 32; i++) /* During reload we have to use the df_regs_ever_live infos since reload is marking FPRs used as spill slots there as live before actually making the code changes. Without --- 7509,7515 ---- cfun_frame_layout.fpr_bitmap = 0; cfun_frame_layout.high_fprs = 0; if (TARGET_64BIT) ! for (i = F8_REGNUM; i <= F15_REGNUM; i++) /* During reload we have to use the df_regs_ever_live infos since reload is marking FPRs used as spill slots there as live before actually making the code changes. Without *************** s390_register_info (int clobbered_regs[] *** 7517,7523 **** || crtl->saves_all_registers))) && !global_regs[i]) { ! cfun_set_fpr_bit (i - 16); cfun_frame_layout.high_fprs++; } } --- 7521,7527 ---- || crtl->saves_all_registers))) && !global_regs[i]) { ! cfun_set_fpr_save (i); cfun_frame_layout.high_fprs++; } } *************** s390_register_info (int clobbered_regs[] *** 7644,7657 **** min_fpr = 0; for (i = min_fpr; i < max_fpr; i++) ! cfun_set_fpr_bit (i); } } if (!TARGET_64BIT) ! for (i = 2; i < 4; i++) ! if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16]) ! cfun_set_fpr_bit (i); } /* Fill cfun->machine with info about frame of current function. */ --- 7648,7664 ---- min_fpr = 0; for (i = min_fpr; i < max_fpr; i++) ! cfun_set_fpr_save (i + F0_REGNUM); } } if (!TARGET_64BIT) ! { ! if (df_regs_ever_live_p (F4_REGNUM) && !global_regs[F4_REGNUM]) ! cfun_set_fpr_save (F4_REGNUM); ! if (df_regs_ever_live_p (F6_REGNUM) && !global_regs[F6_REGNUM]) ! cfun_set_fpr_save (F6_REGNUM); ! } } /* Fill cfun->machine with info about frame of current function. */ *************** s390_frame_info (void) *** 7687,7697 **** { cfun_frame_layout.f4_offset = (cfun_frame_layout.gprs_offset ! - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3))); cfun_frame_layout.f0_offset = (cfun_frame_layout.f4_offset ! - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1))); } else { --- 7694,7706 ---- { cfun_frame_layout.f4_offset = (cfun_frame_layout.gprs_offset ! - 8 * (cfun_fpr_save_p (F4_REGNUM) ! + cfun_fpr_save_p (F6_REGNUM))); cfun_frame_layout.f0_offset = (cfun_frame_layout.f4_offset ! - 8 * (cfun_fpr_save_p (F0_REGNUM) ! + cfun_fpr_save_p (F2_REGNUM))); } else { *************** s390_frame_info (void) *** 7700,7721 **** cfun_frame_layout.f0_offset = ((cfun_frame_layout.gprs_offset & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1)) ! - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1))); cfun_frame_layout.f4_offset = (cfun_frame_layout.f0_offset ! - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3))); } } else /* no backchain */ { cfun_frame_layout.f4_offset = (STACK_POINTER_OFFSET ! - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3))); cfun_frame_layout.f0_offset = (cfun_frame_layout.f4_offset ! - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1))); cfun_frame_layout.gprs_offset = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size; --- 7709,7734 ---- cfun_frame_layout.f0_offset = ((cfun_frame_layout.gprs_offset & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1)) ! - 8 * (cfun_fpr_save_p (F0_REGNUM) ! + cfun_fpr_save_p (F2_REGNUM))); cfun_frame_layout.f4_offset = (cfun_frame_layout.f0_offset ! - 8 * (cfun_fpr_save_p (F4_REGNUM) ! + cfun_fpr_save_p (F6_REGNUM))); } } else /* no backchain */ { cfun_frame_layout.f4_offset = (STACK_POINTER_OFFSET ! - 8 * (cfun_fpr_save_p (F4_REGNUM) ! + cfun_fpr_save_p (F6_REGNUM))); cfun_frame_layout.f0_offset = (cfun_frame_layout.f4_offset ! - 8 * (cfun_fpr_save_p (F0_REGNUM) ! + cfun_fpr_save_p (F2_REGNUM))); cfun_frame_layout.gprs_offset = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size; *************** s390_frame_info (void) *** 7747,7754 **** cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8; ! for (i = 0; i < 8; i++) ! if (cfun_fpr_bit_p (i)) cfun_frame_layout.frame_size += 8; cfun_frame_layout.frame_size += cfun_gprs_save_area_size; --- 7760,7767 ---- cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8; ! for (i = F0_REGNUM; i <= F7_REGNUM; i++) ! if (cfun_fpr_save_p (i)) cfun_frame_layout.frame_size += 8; cfun_frame_layout.frame_size += cfun_gprs_save_area_size; *************** s390_emit_prologue (void) *** 8453,8463 **** offset = cfun_frame_layout.f0_offset; /* Save f0 and f2. */ ! for (i = 0; i < 2; i++) { ! if (cfun_fpr_bit_p (i)) { ! save_fpr (stack_pointer_rtx, offset, i + 16); offset += 8; } else if (!TARGET_PACKED_STACK) --- 8466,8476 ---- offset = cfun_frame_layout.f0_offset; /* Save f0 and f2. */ ! for (i = F0_REGNUM; i <= F0_REGNUM + 1; i++) { ! if (cfun_fpr_save_p (i)) { ! save_fpr (stack_pointer_rtx, offset, i); offset += 8; } else if (!TARGET_PACKED_STACK) *************** s390_emit_prologue (void) *** 8466,8481 **** /* Save f4 and f6. */ offset = cfun_frame_layout.f4_offset; ! for (i = 2; i < 4; i++) { ! if (cfun_fpr_bit_p (i)) { ! insn = save_fpr (stack_pointer_rtx, offset, i + 16); offset += 8; /* If f4 and f6 are call clobbered they are saved due to stdargs and therefore are not frame related. */ ! if (!call_really_used_regs[i + 16]) RTX_FRAME_RELATED_P (insn) = 1; } else if (!TARGET_PACKED_STACK) --- 8479,8494 ---- /* Save f4 and f6. */ offset = cfun_frame_layout.f4_offset; ! for (i = F4_REGNUM; i <= F4_REGNUM + 1; i++) { ! if (cfun_fpr_save_p (i)) { ! insn = save_fpr (stack_pointer_rtx, offset, i); offset += 8; /* If f4 and f6 are call clobbered they are saved due to stdargs and therefore are not frame related. */ ! if (!call_really_used_regs[i]) RTX_FRAME_RELATED_P (insn) = 1; } else if (!TARGET_PACKED_STACK) *************** s390_emit_prologue (void) *** 8489,8508 **** offset = (cfun_frame_layout.f8_offset + (cfun_frame_layout.high_fprs - 1) * 8); ! for (i = 15; i > 7 && offset >= 0; i--) ! if (cfun_fpr_bit_p (i)) { ! insn = save_fpr (stack_pointer_rtx, offset, i + 16); RTX_FRAME_RELATED_P (insn) = 1; offset -= 8; } if (offset >= cfun_frame_layout.f8_offset) ! next_fpr = i + 16; } if (!TARGET_PACKED_STACK) ! next_fpr = cfun_save_high_fprs_p ? 31 : 0; if (flag_stack_usage_info) current_function_static_stack_size = cfun_frame_layout.frame_size; --- 8502,8521 ---- offset = (cfun_frame_layout.f8_offset + (cfun_frame_layout.high_fprs - 1) * 8); ! for (i = F15_REGNUM; i >= F8_REGNUM && offset >= 0; i--) ! if (cfun_fpr_save_p (i)) { ! insn = save_fpr (stack_pointer_rtx, offset, i); RTX_FRAME_RELATED_P (insn) = 1; offset -= 8; } if (offset >= cfun_frame_layout.f8_offset) ! next_fpr = i; } if (!TARGET_PACKED_STACK) ! next_fpr = cfun_save_high_fprs_p ? F15_REGNUM : 0; if (flag_stack_usage_info) current_function_static_stack_size = cfun_frame_layout.frame_size; *************** s390_emit_prologue (void) *** 8647,8654 **** offset = 0; ! for (i = 24; i <= next_fpr; i++) ! if (cfun_fpr_bit_p (i - 16)) { rtx addr = plus_constant (Pmode, stack_pointer_rtx, cfun_frame_layout.frame_size --- 8660,8667 ---- offset = 0; ! for (i = F8_REGNUM; i <= next_fpr; i++) ! if (cfun_fpr_save_p (i)) { rtx addr = plus_constant (Pmode, stack_pointer_rtx, cfun_frame_layout.frame_size *************** s390_emit_epilogue (bool sibcall) *** 8777,8785 **** if (cfun_save_high_fprs_p) { next_offset = cfun_frame_layout.f8_offset; ! for (i = 24; i < 32; i++) { ! if (cfun_fpr_bit_p (i - 16)) { restore_fpr (frame_pointer, offset + next_offset, i); --- 8790,8798 ---- if (cfun_save_high_fprs_p) { next_offset = cfun_frame_layout.f8_offset; ! for (i = F8_REGNUM; i <= F15_REGNUM; i++) { ! if (cfun_fpr_save_p (i)) { restore_fpr (frame_pointer, offset + next_offset, i); *************** s390_emit_epilogue (bool sibcall) *** 8795,8803 **** else { next_offset = cfun_frame_layout.f4_offset; ! for (i = 18; i < 20; i++) { ! if (cfun_fpr_bit_p (i - 16)) { restore_fpr (frame_pointer, offset + next_offset, i); --- 8808,8817 ---- else { next_offset = cfun_frame_layout.f4_offset; ! /* f4, f6 */ ! for (i = F4_REGNUM; i <= F4_REGNUM + 1; i++) { ! if (cfun_fpr_save_p (i)) { restore_fpr (frame_pointer, offset + next_offset, i); *************** s390_conditional_register_usage (void) *** 10504,10521 **** } if (TARGET_64BIT) { ! for (i = 24; i < 32; i++) call_used_regs[i] = call_really_used_regs[i] = 0; } else { ! for (i = 18; i < 20; i++) ! call_used_regs[i] = call_really_used_regs[i] = 0; } if (TARGET_SOFT_FLOAT) { ! for (i = 16; i < 32; i++) call_used_regs[i] = fixed_regs[i] = 1; } } --- 10518,10535 ---- } if (TARGET_64BIT) { ! for (i = F8_REGNUM; i <= F15_REGNUM; i++) call_used_regs[i] = call_really_used_regs[i] = 0; } else { ! call_used_regs[F4_REGNUM] = call_really_used_regs[F4_REGNUM] = 0; ! call_used_regs[F6_REGNUM] = call_really_used_regs[F6_REGNUM] = 0; } if (TARGET_SOFT_FLOAT) { ! for (i = F0_REGNUM; i <= F15_REGNUM; i++) call_used_regs[i] = fixed_regs[i] = 1; } } Index: gcc/config/s390/s390.h =================================================================== *** gcc/config/s390/s390.h.orig --- gcc/config/s390/s390.h *************** enum reg_class *** 477,482 **** --- 477,499 ---- { 0xffffffff, 0x0000003f }, /* ALL_REGS */ \ } + #define F0_REGNUM 16 + #define F1_REGNUM 20 + #define F2_REGNUM 17 + #define F3_REGNUM 21 + #define F4_REGNUM 18 + #define F5_REGNUM 22 + #define F6_REGNUM 19 + #define F7_REGNUM 23 + #define F8_REGNUM 24 + #define F9_REGNUM 25 + #define F10_REGNUM 26 + #define F11_REGNUM 27 + #define F12_REGNUM 28 + #define F13_REGNUM 29 + #define F14_REGNUM 30 + #define F15_REGNUM 31 + /* In some case register allocation order is not enough for IRA to generate a good code. The following macro (if defined) increases cost of REGNO for a pseudo approximately by pseudo usage frequency