This patches adds two new files to support the vector cost model and
modifies the Makefile fragment to build the cost model c++ file. Due to
the large size this patch is provided as an attachment.
gcc/ChangeLog:
* gcc/config.gcc (riscv-vector-cost.o): New object file to build.
* config/riscv/riscv-vector-cost.cc: New file for riscv vector cost
model
* config/riscv/riscv-vector-cost.h: New header file for riscv vector
cost model.
* config/riscv/t-riscv: Add make rule for riscv-vector-cost.o.
From eb995818cd5f77f85e8df93b690b00ce1fd1aa35 Mon Sep 17 00:00:00 2001
From: Michael Collison <colli...@rivosinc.com>
Date: Thu, 2 Mar 2023 12:27:36 -0500
Subject: [PATCH] Autovectorization patch set 2
---
gcc/config.gcc | 2 +-
gcc/config/riscv/riscv-vector-cost.cc | 620 ++++++++++++++++++++++++++
gcc/config/riscv/riscv-vector-cost.h | 400 +++++++++++++++++
gcc/config/riscv/t-riscv | 5 +
4 files changed, 1026 insertions(+), 1 deletion(-)
create mode 100644 gcc/config/riscv/riscv-vector-cost.cc
create mode 100644 gcc/config/riscv/riscv-vector-cost.h
diff --git a/gcc/config.gcc b/gcc/config.gcc
index c070e6ecd2e..a4017777187 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -530,7 +530,7 @@ pru-*-*)
riscv*)
cpu_type=riscv
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
- extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
+ extra_objs="${extra_objs} riscv-vector-cost.o riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
d_target_objs="riscv-d.o"
extra_headers="riscv_vector.h"
target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/riscv-vector-cost.cc b/gcc/config/riscv/riscv-vector-cost.cc
new file mode 100644
index 00000000000..5a33b20843a
--- /dev/null
+++ b/gcc/config/riscv/riscv-vector-cost.cc
@@ -0,0 +1,620 @@
+/* Cost model implementation for RISC-V 'V' Extension for GNU compiler.
+ Copyright (C) 2022-2023 Free Software Foundation, Inc.
+ Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define INCLUDE_STRING
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "regs.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "rtlanal.h"
+#include "output.h"
+#include "alias.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "function.h"
+#include "explow.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "target.h"
+#include "basic-block.h"
+#include "expr.h"
+#include "optabs.h"
+#include "bitmap.h"
+#include "df.h"
+#include "diagnostic.h"
+#include "builtins.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "opts.h"
+#include "langhooks.h"
+#include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-loop-niter.h"
+#include "riscv-vector-builtins.h"
+
+/* This file should be included last. */
+#include "riscv-vector-cost.h"
+#include "target-def.h"
+
+bool vector_insn_cost_table::get_cost(rtx x, machine_mode mode, int *cost,
+ bool speed) const {
+ rtx op0, op1, op2;
+ enum rtx_code code = GET_CODE(x);
+ scalar_int_mode int_mode;
+
+ /* By default, assume that everything has equivalent cost to the
+ cheapest instruction. Any additional costs are applied as a delta
+ above this default. */
+ *cost = COSTS_N_INSNS(1);
+
+ switch (code) {
+ case SET:
+ /* The cost depends entirely on the operands to SET. */
+ *cost = 0;
+ op0 = SET_DEST(x);
+ op1 = SET_SRC(x);
+
+ switch (GET_CODE(op0)) {
+ case MEM:
+ if (speed) {
+ *cost += store->cost(x, mode);
+ }
+
+ //*cost += rtx_cost(op1, mode, SET, 1, speed);
+ return true;
+
+ case SUBREG:
+ if (!REG_P(SUBREG_REG(op0)))
+ *cost += rtx_cost(SUBREG_REG(op0), VOIDmode, SET, 0, speed);
+
+ /* Fall through. */
+ case REG:
+ /* The cost is one per vector-register copied. */
+ if (VECTOR_MODE_P(GET_MODE(op0))) {
+ *cost = mov->cost(x, mode);
+ } else
+ /* Cost is just the cost of the RHS of the set. */
+ *cost += rtx_cost(op1, mode, SET, 1, speed);
+ return true;
+
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ /* Bit-field insertion. Strip any redundant widening of
+ the RHS to meet the width of the target. */
+ if (SUBREG_P(op1))
+ op1 = SUBREG_REG(op1);
+ if ((GET_CODE(op1) == ZERO_EXTEND || GET_CODE(op1) == SIGN_EXTEND) &&
+ CONST_INT_P(XEXP(op0, 1)) &&
+ is_a<scalar_int_mode>(GET_MODE(XEXP(op1, 0)), &int_mode) &&
+ GET_MODE_BITSIZE(int_mode) >= INTVAL(XEXP(op0, 1)))
+ op1 = XEXP(op1, 0);
+
+ if (CONST_INT_P(op1)) {
+ /* MOV immediate is assumed to always be cheap. */
+ *cost = COSTS_N_INSNS(1);
+ } else {
+ /* BFM. */
+ if (speed)
+ *cost += alu->cost(x, mode);
+ *cost += rtx_cost(op1, VOIDmode, (enum rtx_code)code, 1, speed);
+ }
+
+ return true;
+
+ default:
+ /* We can't make sense of this, assume default cost. */
+ *cost = COSTS_N_INSNS(1);
+ return false;
+ }
+ return false;
+
+ case MEM:
+ if (speed) {
+ *cost += load->cost(x, mode);
+ }
+
+ return true;
+
+ case NEG:
+ op0 = XEXP(x, 0);
+
+ if (speed) {
+ /* FNEG. */
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ if (GET_MODE_CLASS(mode) == MODE_INT) {
+ if (GET_RTX_CLASS(GET_CODE(op0)) == RTX_COMPARE ||
+ GET_RTX_CLASS(GET_CODE(op0)) == RTX_COMM_COMPARE) {
+ /* CSETM. */
+ *cost += rtx_cost(XEXP(op0, 0), VOIDmode, NEG, 0, speed);
+ return true;
+ }
+
+ /* Cost this as SUB wzr, X. */
+ op0 = CONST0_RTX(mode);
+ op1 = XEXP(x, 0);
+ goto cost_minus;
+ }
+ return false;
+
+ case COMPARE:
+ op0 = XEXP(x, 0);
+ op1 = XEXP(x, 1);
+
+ if (op1 == const0_rtx && GET_CODE(op0) == AND) {
+ x = op0;
+ mode = GET_MODE(op0);
+ goto cost_logic;
+ }
+
+ if (GET_MODE_CLASS(GET_MODE(op0)) == MODE_INT) {
+ /* TODO: A write to the CC flags possibly costs extra, this
+ needs encoding in the cost tables. */
+
+ mode = GET_MODE(op0);
+ /* ANDS. */
+ if (GET_CODE(op0) == AND) {
+ x = op0;
+ goto cost_logic;
+ }
+
+ if (GET_CODE(op0) == PLUS) {
+ /* ADDS (and CMN alias). */
+ x = op0;
+ goto cost_plus;
+ }
+
+ if (GET_CODE(op0) == MINUS) {
+ /* SUBS. */
+ x = op0;
+ goto cost_minus;
+ }
+
+ if (GET_CODE(op0) == ZERO_EXTRACT && op1 == const0_rtx &&
+ CONST_INT_P(XEXP(op0, 1)) && CONST_INT_P(XEXP(op0, 2))) {
+ /* COMPARE of ZERO_EXTRACT form of TST-immediate.
+ Handle it here directly rather than going to cost_logic
+ since we know the immediate generated for the TST is valid
+ so we can avoid creating an intermediate rtx for it only
+ for costing purposes. */
+ if (speed)
+ *cost += alu->cost(x, mode);
+
+ *cost += rtx_cost(XEXP(op0, 0), GET_MODE(op0), ZERO_EXTRACT, 0, speed);
+ return true;
+ }
+
+ if (GET_CODE(op1) == NEG) {
+ /* CMN. */
+ if (speed)
+ *cost += alu->cost(x, mode);
+
+ *cost += rtx_cost(op0, mode, COMPARE, 0, speed);
+ *cost += rtx_cost(XEXP(op1, 0), mode, NEG, 1, speed);
+ return true;
+ }
+
+ /* CMP.
+
+ Compare can freely swap the order of operands, and
+ canonicalization puts the more complex operation first.
+ But the integer MINUS logic expects the shift/extend
+ operation in op1. */
+ if (!(REG_P(op0) || (SUBREG_P(op0) && REG_P(SUBREG_REG(op0))))) {
+ op0 = XEXP(x, 1);
+ op1 = XEXP(x, 0);
+ }
+ goto cost_minus;
+ }
+
+ if (VECTOR_MODE_P(mode)) {
+ /* Vector compare. */
+ if (speed)
+ *cost += alu->cost(x, mode);
+
+ return false;
+ }
+ return false;
+
+ case MINUS: {
+ op0 = XEXP(x, 0);
+ op1 = XEXP(x, 1);
+
+ cost_minus:
+ *cost += rtx_cost(op0, mode, MINUS, 0, speed);
+
+ return true;
+ }
+
+ case PLUS: {
+ op0 = XEXP(x, 0);
+ op1 = XEXP(x, 1);
+
+ cost_plus:
+ if (GET_RTX_CLASS(GET_CODE(op0)) == RTX_COMPARE ||
+ GET_RTX_CLASS(GET_CODE(op0)) == RTX_COMM_COMPARE) {
+ /* CSINC. */
+ *cost += rtx_cost(XEXP(op0, 0), mode, PLUS, 0, speed);
+ *cost += rtx_cost(op1, mode, PLUS, 1, speed);
+ return true;
+ }
+
+ *cost += rtx_cost(op1, mode, PLUS, 1, speed);
+
+ return true;
+ }
+
+ case BSWAP:
+ *cost = COSTS_N_INSNS(1);
+
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ case IOR:
+ *cost = COSTS_N_INSNS(1);
+
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+ return true;
+
+ case XOR:
+ case AND:
+ cost_logic:
+ if (speed)
+ *cost += alu->cost(x, mode);
+ return true;
+
+ case NOT:
+ *cost += alu->cost(x, mode);
+ return false;
+
+ case ZERO_EXTEND:
+
+ op0 = XEXP(x, 0);
+ /* If a value is written in SI mode, then zero extended to DI
+ mode, the operation will in general be free as a write to
+ a 'w' register implicitly zeroes the upper bits of an 'x'
+ register. However, if this is
+
+ (set (reg) (zero_extend (reg)))
+
+ we must cost the explicit register move. */
+ if (mode == DImode && GET_MODE(op0) == SImode) {
+ int op_cost = rtx_cost(op0, VOIDmode, ZERO_EXTEND, 0, speed);
+
+ /* If OP_COST is non-zero, then the cost of the zero extend
+ is effectively the cost of the inner operation. Otherwise
+ we have a MOV instruction and we take the cost from the MOV
+ itself. This is true independently of whether we are
+ optimizing for space or time. */
+ if (op_cost)
+ *cost = op_cost;
+
+ return true;
+ } else if (MEM_P(op0)) {
+ /* All loads can zero extend to any size for free. */
+ *cost = rtx_cost(op0, VOIDmode, ZERO_EXTEND, 0, speed);
+ return true;
+ }
+
+ if (speed) {
+ /* UMOV. */
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ case SIGN_EXTEND:
+ if (MEM_P(XEXP(x, 0))) {
+ if (speed) {
+ *cost += load->cost(x, mode);
+ }
+ return true;
+ }
+
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ case ASHIFT:
+ op0 = XEXP(x, 0);
+ op1 = XEXP(x, 1);
+
+ if (CONST_INT_P(op1)) {
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+
+ /* We can incorporate zero/sign extend for free. */
+ if (GET_CODE(op0) == ZERO_EXTEND || GET_CODE(op0) == SIGN_EXTEND)
+ op0 = XEXP(op0, 0);
+
+ *cost += rtx_cost(op0, VOIDmode, ASHIFT, 0, speed);
+ return true;
+ } else {
+ if (speed)
+ /* Vector shift (register). */
+ *cost += alu->cost(x, mode);
+ return false; /* All arguments need to be in registers. */
+ }
+
+ case ROTATE:
+ case ROTATERT:
+ case LSHIFTRT:
+ case ASHIFTRT:
+ op0 = XEXP(x, 0);
+ op1 = XEXP(x, 1);
+
+ if (CONST_INT_P(op1)) {
+ /* ASR (immediate) and friends. */
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+
+ *cost += rtx_cost(op0, mode, (enum rtx_code)code, 0, speed);
+ return true;
+ } else {
+ if (VECTOR_MODE_P(mode)) {
+ if (speed)
+ /* Vector shift (register). */
+ *cost += alu->cost(x, mode);
+ }
+ return false; /* All arguments need to be in registers. */
+ }
+
+ case SYMBOL_REF:
+ return true;
+
+ case HIGH:
+ case LO_SUM:
+ /* ADRP/ADD (immediate). */
+ if (speed)
+ *cost += alu->cost(x, mode);
+ return true;
+
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ /* UBFX/SBFX. */
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+
+ /* We can trust that the immediates used will be correct (there
+ are no by-register forms), so we need only cost op0. */
+ *cost += rtx_cost(XEXP(x, 0), VOIDmode, (enum rtx_code)code, 0, speed);
+ return true;
+
+ case MULT:
+ *cost += mult->cost(x, mode);
+ return true;
+
+ case MOD:
+ case UMOD:
+ if (speed) {
+ /* Slighly prefer UMOD over SMOD. */
+ *cost += alu->cost(x, mode);
+ }
+ return false; /* All arguments need to be in registers. */
+
+ case DIV:
+ case UDIV:
+ case SQRT:
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+ return false; /* All arguments need to be in registers. */
+
+ case IF_THEN_ELSE:
+ if (speed) {
+ *cost += if_then_else->cost(x, mode);
+ }
+ return true;
+
+ case EQ:
+ case NE:
+ case GT:
+ case GTU:
+ case LT:
+ case LTU:
+ case GE:
+ case GEU:
+ case LE:
+ case LEU:
+
+ return false; /* All arguments must be in registers. */
+
+ case FMA:
+ op0 = XEXP(x, 0);
+ op1 = XEXP(x, 1);
+ op2 = XEXP(x, 2);
+
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+
+ /* FMSUB, FNMADD, and FNMSUB are free. */
+ if (GET_CODE(op0) == NEG)
+ op0 = XEXP(op0, 0);
+
+ if (GET_CODE(op2) == NEG)
+ op2 = XEXP(op2, 0);
+
+ /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
+ and the by-element operand as operand 0. */
+ if (GET_CODE(op1) == NEG)
+ op1 = XEXP(op1, 0);
+
+ /* Catch vector-by-element operations. The by-element operand can
+ either be (vec_duplicate (vec_select (x))) or just
+ (vec_select (x)), depending on whether we are multiplying by
+ a vector or a scalar.
+
+ Canonicalization is not very good in these cases, FMA4 will put the
+ by-element operand as operand 0, FNMA4 will have it as operand 1. */
+ if (GET_CODE(op0) == VEC_DUPLICATE)
+ op0 = XEXP(op0, 0);
+ else if (GET_CODE(op1) == VEC_DUPLICATE)
+ op1 = XEXP(op1, 0);
+
+ if (GET_CODE(op0) == VEC_SELECT)
+ op0 = XEXP(op0, 0);
+ else if (GET_CODE(op1) == VEC_SELECT)
+ op1 = XEXP(op1, 0);
+
+ /* If the remaining parameters are not registers,
+ get the cost to put them into registers. */
+ *cost += rtx_cost(op0, mode, FMA, 0, speed);
+ *cost += rtx_cost(op1, mode, FMA, 1, speed);
+ *cost += rtx_cost(op2, mode, FMA, 2, speed);
+ return true;
+
+ case FLOAT:
+ case UNSIGNED_FLOAT:
+ return false;
+
+ case FLOAT_EXTEND:
+ if (speed) {
+ /*Vector truncate. */
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ case FLOAT_TRUNCATE:
+ if (speed) {
+ /*Vector conversion. */
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ case FIX:
+ case UNSIGNED_FIX:
+ x = XEXP(x, 0);
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+
+ *cost += rtx_cost(x, VOIDmode, (enum rtx_code)code, 0, speed);
+ return true;
+
+ case ABS:
+ /* ABS (vector). */
+ if (speed)
+ *cost += alu->cost(x, mode);
+ return false;
+
+ case SMAX:
+ case SMIN:
+ if (speed) {
+ *cost += alu->cost(x, mode);
+ }
+ return false;
+
+ case UNSPEC:
+ break;
+
+ case TRUNCATE:
+ break;
+ case CONST_VECTOR: {
+ *cost = mov->cost(x, mode);
+ break;
+ }
+ case VEC_CONCAT:
+ /* depending on the operation, either DUP or INS.
+ For now, keep default costing. */
+ break;
+ case VEC_DUPLICATE:
+ /* Load using a DUP. */
+ *cost = dup->cost(x, mode);
+ return false;
+ case VEC_SELECT: {
+ rtx op0 = XEXP(x, 0);
+ *cost = rtx_cost(op0, GET_MODE(op0), VEC_SELECT, 0, speed);
+
+ /* cost subreg of 0 as free, otherwise as DUP */
+ rtx op1 = XEXP(x, 1);
+ if (vec_series_lowpart_p(mode, GET_MODE(op1), op1))
+ ;
+ else if (vec_series_highpart_p(mode, GET_MODE(op1), op1))
+ *cost = dup->cost(x, mode);
+ else
+ *cost = extract->cost(x, mode);
+ return true;
+ }
+ default:
+ break;
+ }
+
+ if (dump_file)
+ fprintf(dump_file, "\nFailed to cost RTX. Assuming default cost.\n");
+
+ return true;
+}
+
+extern int riscv_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
+
+riscv_vector_costs::riscv_vector_costs(vec_info *vinfo, bool costing_for_scalar)
+ : vector_costs(vinfo, costing_for_scalar) {}
+
+unsigned riscv_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info, slp_tree,
+ tree vectype, int misalign,
+ vect_cost_model_location where) {
+ int stmt_cost
+ = riscv_builtin_vectorization_cost (kind, vectype, misalign);
+ return record_stmt_cost(stmt_info, where, count * stmt_cost);
+}
+
+void riscv_vector_costs::finish_cost(const vector_costs *uncast_scalar_costs) {
+ auto *scalar_costs =
+ static_cast<const riscv_vector_costs *>(uncast_scalar_costs);
+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info>(m_vinfo);
+ if (loop_vinfo)
+ m_costs[vect_body] = 1;
+ vector_costs::finish_cost(scalar_costs);
+}
+
+bool riscv_vector_costs::better_main_loop_than_p(
+ const vector_costs *uncast_other) const {
+ auto other = static_cast<const riscv_vector_costs *>(uncast_other);
+
+ return vector_costs::better_main_loop_than_p(other);
+}
diff --git a/gcc/config/riscv/riscv-vector-cost.h b/gcc/config/riscv/riscv-vector-cost.h
new file mode 100644
index 00000000000..ef398915a18
--- /dev/null
+++ b/gcc/config/riscv/riscv-vector-cost.h
@@ -0,0 +1,400 @@
+/* Cost model definitions for RISC-V 'V' Extension for GNU compiler.
+ Copyright (C) 2022-2023 Free Software Foundation, Inc.
+ Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_RISCV_VECTOR_COST_H
+#define GCC_RISCV_VECTOR_COST_H
+
+enum vector_tune_type {
+ VECTOR_TUNE_GENERIC,
+};
+
+struct vector_insn_scale_table {
+ const int load;
+ const int store;
+ const int alu;
+ const int mult;
+ const int mov;
+ const int dup;
+ const int extract;
+ const int if_then_else;
+};
+
+struct vector_stmt_scale_table {
+ const int scalar_int_stmt_cost; /* Cost of any int scalar operation,
+ excluding load and store. */
+ const int scalar_fp_stmt_cost; /* Cost of any fp scalar operation,
+ excluding load and store. */
+ const int scalar_load_cost; /* Cost of scalar load. */
+ const int scalar_store_cost; /* Cost of scalar store. */
+ const int vec_int_stmt_cost; /* Cost of any int vector operation,
+ excluding load, store, permute,
+ vector-to-scalar and
+ scalar-to-vector operation. */
+ const int vec_fp_stmt_cost; /* Cost of any fp vector operation,
+ excluding load, store, permute,
+ vector-to-scalar and
+ scalar-to-vector operation. */
+ const int vec_permute_cost; /* Cost of permute operation. */
+ const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector
+ operation. */
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
+ const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
+ const int vec_unalign_store_cost; /* Cost of unaligned vector store. */
+ const int vec_store_cost; /* Cost of vector store. */
+ const int cond_taken_branch_cost; /* Cost of taken branch. */
+ const int cond_not_taken_branch_cost; /* Cost of not taken branch. */
+};
+
+/* Information about vector code that we're in the process of costing. */
+class riscv_vector_costs : public vector_costs {
+public:
+ riscv_vector_costs(vec_info *, bool);
+
+ unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info, slp_tree, tree vectype,
+ int misalign,
+ vect_cost_model_location where) override;
+ void finish_cost(const vector_costs *) override;
+ bool better_main_loop_than_p(const vector_costs *other) const override;
+};
+
+template <typename T> class vector_insn_cost {
+public:
+ vector_insn_cost(const T *_scale_table) : m_scale_table(_scale_table) {}
+ ~vector_insn_cost() {}
+
+ virtual int scale(RTX_CODE) const { return 1; }
+
+ virtual unsigned cost(rtx x, machine_mode mode) const {
+ return riscv_vector::riscv_classify_nf(mode) * riscv_vector::riscv_vlmul_regsize(mode) *
+ scale(x == NULL_RTX ? UNKNOWN : GET_CODE(x));
+ }
+
+protected:
+ const T *m_scale_table;
+};
+
+template <typename T> class vector_cost_table {
+public:
+ vector_cost_table(const T *) {}
+ ~vector_cost_table() {}
+
+ virtual bool get_cost(rtx, machine_mode, int *, bool) const { return 1; }
+};
+
+class vector_alu_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->alu; }
+};
+
+class vector_load_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->load; }
+};
+
+class vector_store_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->store; }
+};
+
+class vector_mult_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->mult; }
+};
+
+class vector_mov_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->mov; }
+};
+
+class vector_dup_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->dup; }
+};
+
+class vector_extract_cost : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override { return m_scale_table->extract; }
+};
+
+class vector_if_then_else_cost
+ : public vector_insn_cost<vector_insn_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->if_then_else;
+ }
+};
+
+class vector_insn_cost_table
+ : public vector_cost_table<vector_insn_scale_table> {
+public:
+ vector_insn_cost_table(const vector_insn_scale_table *_scale_table)
+ : vector_cost_table(_scale_table) {
+ load = new vector_load_cost(_scale_table);
+ store = new vector_store_cost(_scale_table);
+ alu = new vector_alu_cost(_scale_table);
+ mult = new vector_mult_cost(_scale_table);
+ mov = new vector_mov_cost(_scale_table);
+ dup = new vector_dup_cost(_scale_table);
+ extract = new vector_extract_cost(_scale_table);
+ if_then_else = new vector_if_then_else_cost(_scale_table);
+ }
+
+ bool get_cost(rtx, machine_mode, int *, bool) const override;
+
+public:
+ const vector_insn_cost<vector_insn_scale_table> *load;
+ const vector_insn_cost<vector_insn_scale_table> *store;
+ const vector_insn_cost<vector_insn_scale_table> *alu;
+ const vector_insn_cost<vector_insn_scale_table> *mult;
+ const vector_insn_cost<vector_insn_scale_table> *mov;
+ const vector_insn_cost<vector_insn_scale_table> *dup;
+ const vector_insn_cost<vector_insn_scale_table> *extract;
+ const vector_insn_cost<vector_insn_scale_table> *if_then_else;
+};
+
+// ==================== vector stmt cost=========================
+class vector_scalar_int_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->scalar_int_stmt_cost;
+ }
+};
+
+class vector_scalar_fp_cost : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->scalar_fp_stmt_cost;
+ }
+};
+
+class vector_scalar_load_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->scalar_load_cost;
+ }
+};
+
+class vector_scalar_store_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->scalar_store_cost;
+ }
+};
+
+class vector_vec_int_cost : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_int_stmt_cost;
+ }
+};
+
+class vector_vec_fp_cost : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_fp_stmt_cost;
+ }
+};
+
+class vector_vec_permute_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_permute_cost;
+ }
+};
+
+class vector_vec_to_scalar_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_to_scalar_cost;
+ }
+};
+
+class vector_scalar_to_vec_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->scalar_to_vec_cost;
+ }
+};
+
+class vector_vec_align_load_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_align_load_cost;
+ }
+};
+
+class vector_vec_unalign_load_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_unalign_load_cost;
+ }
+};
+
+class vector_vec_unalign_store_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_unalign_store_cost;
+ }
+};
+
+class vector_vec_store_cost : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->vec_store_cost;
+ }
+};
+
+class vector_cond_taken_branch_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->cond_taken_branch_cost;
+ }
+};
+
+class vector_cond_not_taken_branch_cost
+ : public vector_insn_cost<vector_stmt_scale_table> {
+public:
+ // use the same construction function as the vector_insn_cost
+ using vector_insn_cost::vector_insn_cost;
+
+ int scale(RTX_CODE) const override {
+ return m_scale_table->cond_not_taken_branch_cost;
+ }
+};
+
+class vector_stmt_cost_table
+ : public vector_cost_table<vector_stmt_scale_table> {
+public:
+ vector_stmt_cost_table(const vector_stmt_scale_table *_scale_table)
+ : vector_cost_table(_scale_table) {
+ scalar_int = new vector_scalar_int_cost(_scale_table);
+ scalar_fp = new vector_scalar_fp_cost(_scale_table);
+ scalar_load = new vector_scalar_load_cost(_scale_table);
+ scalar_store = new vector_scalar_store_cost(_scale_table);
+ vec_int = new vector_vec_int_cost(_scale_table);
+ vec_fp = new vector_vec_fp_cost(_scale_table);
+ vec_permute = new vector_vec_permute_cost(_scale_table);
+ vec_to_scalar = new vector_vec_to_scalar_cost(_scale_table);
+ scalar_to_vec = new vector_scalar_to_vec_cost(_scale_table);
+ vec_align_load = new vector_vec_align_load_cost(_scale_table);
+ vec_unalign_load = new vector_vec_unalign_load_cost(_scale_table);
+ vec_unalign_store = new vector_vec_unalign_store_cost(_scale_table);
+ vec_store = new vector_vec_store_cost(_scale_table);
+ cond_taken_branch = new vector_cond_taken_branch_cost(_scale_table);
+ cond_not_taken_branch = new vector_cond_not_taken_branch_cost(_scale_table);
+ }
+
+public:
+ const vector_insn_cost<vector_stmt_scale_table> *scalar_int;
+ const vector_insn_cost<vector_stmt_scale_table> *scalar_fp;
+ const vector_insn_cost<vector_stmt_scale_table> *scalar_load;
+ const vector_insn_cost<vector_stmt_scale_table> *scalar_store;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_int;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_fp;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_permute;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_to_scalar;
+ const vector_insn_cost<vector_stmt_scale_table> *scalar_to_vec;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_align_load;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_unalign_load;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_unalign_store;
+ const vector_insn_cost<vector_stmt_scale_table> *vec_store;
+ const vector_insn_cost<vector_stmt_scale_table> *cond_taken_branch;
+ const vector_insn_cost<vector_stmt_scale_table> *cond_not_taken_branch;
+};
+
+#endif // GCC_RISCV_VECTOR_COST_H
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index d30e0235356..095169741bb 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -51,6 +51,11 @@ riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-c.cc
+riscv-vector-cost.o: $(srcdir)/config/riscv/riscv-vector-cost.cc $(CONFIG_H) $(SYSTEM_H) \
+ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/riscv/riscv-vector-cost.cc
+
riscv-vsetvl.o: $(srcdir)/config/riscv/riscv-vsetvl.cc \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
$(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-config.h \
--
2.34.1