Re: [PATCH v2] [aarch64] Add HiSilicon tsv110 CPU support

Kyrill Tkachov Thu, 21 Jun 2018 05:57:02 -0700

Hi Shaokun,

On 21/06/18 12:07, Zhangshaokun wrote:

Hi Kyrill,


It was the Dragon Boat Festival for a short holiday in China, sorry to
reply later.

On 2018/6/14 15:58, Kyrill Tkachov wrote:

Hi Shaokun,

On 14/06/18 02:09, Shaokun Zhang wrote:

This patch adds HiSilicon's an mcpu: tsv110, which supports v8_4A.

---
   gcc/ChangeLog                            |   8 +++
   gcc/config/aarch64/aarch64-cores.def     |   3 +
   gcc/config/aarch64/aarch64-cost-tables.h | 103 
+++++++++++++++++++++++++++++++
   gcc/config/aarch64/aarch64-tune.md       |   2 +-
   gcc/config/aarch64/aarch64.c             |  80 +++++++++++++++++++++++-
   gcc/doc/invoke.texi                      |   2 +-
   6 files changed, 195 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9c90875..e376714 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2018-06-12  Shaokun Zhang  <zhangshao...@hisilicon.com>
+            Bo Zhou  <zbo.z...@hisilicon.com>
+    * config/aarch64/aarch64-cores.def (tsv110): New CPU.
+    * config/aarch64/aarch64-tune.md: Regenerated.
+    * doc/invoke.texi (AArch64 Options/-mtune): Add "tsv110".
+    * config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
+    * config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.
+

Can you confirm that you've run a bootstrap and test run with this patch
to check there are no regressions?

I have tested this patch (fix some typo) on aarch64 and didn't get any 
regressions.

While, there is issue that is on the master branch:
../.././gcc/bitmap.c: In function ‘unsigned int 
bitmap_last_set_bit(const_bitmap)’:
../.././gcc/bitmap.c:841:26: error: array subscript -1 is below array bounds of 
‘const BITMAP_WORD [2]’ {aka ‘const long unsigned int [2]’} 
[-Werror=array-bounds]
        word = elt->bits[ix];
               ~~~~~~~~~~~~^
cc1plus: all warnings being treated as errors
Makefile:1110: recipe for target 'bitmap.o' failed
make[3]: *** [bitmap.o] Error 1


I don't see that error with the current trunk based off r261832 (today).
Can you make sure the bootstrap passes with your patch on top of the recent 
trunk?

Thanks,
Kyrill

My gcc version is: gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609.
Are you happy to fix it? I fixed it in my local, but I am not sure it is ok.

This version looks good to me but you'll need final approval from the 
maintainers.

I will update patch based on latest branch code today.
Hopefully you and maintainers are happy on v3.

Thanks,
Shaokun.

Thanks,
Kyrill

   2018-06-12  Eric Botcazou  <ebotca...@adacore.com>
         * gcc.c: Document new %@{...} sequence.
diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index e64d831..e6ebf02 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -88,6 +88,9 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2
     /* ARMv8.4-A Architecture Processors.  */
   +/* HiSilicon ('H') cores. */
+AARCH64_CORE("tsv110",     tsv110,    cortexa57,    8_4A, 
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | 
AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
+
   /* Qualcomm ('Q') cores. */
   AARCH64_CORE("saphira",     saphira,    falkor,    8_4A,  
AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
   diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index a455c62..b6890d6 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
     }
   };
   +const struct cpu_cost_table tsv110_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    0,                 /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    0,                 /* extend.  */
+    COSTS_N_INSNS (1), /* extend_arith.  */
+    0,                 /* bfi.  */
+    0,                 /* bfx.  */
+    0,                 /* clz.  */
+    0,                   /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),       /* simple.  */
+      COSTS_N_INSNS (2),       /* flag_setting.  */
+      COSTS_N_INSNS (2),       /* extend.  */
+      COSTS_N_INSNS (2),       /* add.  */
+      COSTS_N_INSNS (2),       /* extend_add.  */
+      COSTS_N_INSNS (11)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (3),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (3),       /* extend.  */
+      COSTS_N_INSNS (3),       /* add.  */
+      COSTS_N_INSNS (3),       /* extend_add.  */
+      COSTS_N_INSNS (19)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),         /* load.  */
+    COSTS_N_INSNS (4),         /* load_sign_extend.  */
+    COSTS_N_INSNS (3),         /* ldrd.  */
+    COSTS_N_INSNS (3),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    2,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),         /* loadf.  */
+    COSTS_N_INSNS (4),         /* loadd.  */
+    COSTS_N_INSNS (4),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    2,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    COSTS_N_INSNS (1),         /* store_unaligned.  */
+    COSTS_N_INSNS (4),         /* loadv.  */
+    COSTS_N_INSNS (4)          /* storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (10),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (4),       /* mult_addsub.  */
+      COSTS_N_INSNS (4),       /* fma.  */
+      COSTS_N_INSNS (4),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (17),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (6),       /* mult_addsub.  */
+      COSTS_N_INSNS (6),       /* fma.  */
+      COSTS_N_INSNS (3),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSTS (1),       /* neg.  */
+      COSTS_N_INSTS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
   #endif
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index 7b3a746..ccdc13b 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
   ;; -*- buffer-read-only: t -*-
   ;; Generated automatically by gentune.sh from aarch64-cores.def
   (define_attr "tune"
-    
"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
+    
"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
       (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bd0ac2f..4138db3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -269,6 +269,22 @@ static const struct cpu_addrcost_table 
generic_addrcost_table =
     0 /* imm_offset  */
   };
   +static const struct cpu_addrcost_table tsv110_addrcost_table =
+{
+    {
+      1, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      1, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  0, /* register_offset  */
+  1, /* register_sextend  */
+  1, /* register_zextend  */
+  0 /* imm_offset  */
+};
+
   static const struct cpu_addrcost_table exynosm1_addrcost_table =
   {
       {
@@ -363,6 +379,16 @@ static const struct cpu_regmove_cost 
cortexa53_regmove_cost =
     2 /* FP2FP  */
   };
   +static const struct cpu_regmove_cost tsv110_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  2, /* GP2FP  */
+  3, /* FP2GP  */
+  2  /* FP2FP  */
+};
+
   static const struct cpu_regmove_cost exynosm1_regmove_cost =
   {
     1, /* GP2GP  */
@@ -469,6 +495,25 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
     1 /* cond_not_taken_branch_cost  */
   };
   +static const struct cpu_vector_cost tsv110_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  1, /* scalar_fp_stmt_cost  */
+  5, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  2, /* vec_int_stmt_cost  */
+  2, /* vec_fp_stmt_cost  */
+  2, /* vec_permute_cost  */
+  3, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  5, /* vec_align_load_cost  */
+  5, /* vec_unalign_load_cost  */
+  1, /* vec_unalign_store_cost  */
+  1, /* vec_store_cost  */
+  1, /* cond_taken_branch_cost  */
+  1 /* cond_not_taken_branch_cost  */
+};
+
   static const struct cpu_vector_cost exynosm1_vector_cost =
   {
     1, /* scalar_int_stmt_cost  */
@@ -571,6 +616,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
     -1            /* default_opt_level  */
   };
   +static const cpu_prefetch_tune tsv110_prefetch_tune =
+{
+  0,            /* num_slots  */
+  64,            /* l1_cache_size  */
+  64,            /* l1_cache_line_size  */
+  512,            /* l2_cache_size  */
+  -1            /* default_opt_level  */
+};
+
   static const cpu_prefetch_tune exynosm1_prefetch_tune =
   {
     0,            /* num_slots  */
@@ -781,7 +835,31 @@ static const struct tune_params cortexa73_tunings =
     &generic_prefetch_tune
   };
   -
+static const struct tune_params tsv110_tunings =
+{
+  &tsv110_extra_costs,
+  &tsv110_addrcost_table,
+  &tsv110_regmove_cost,
+  &tsv110_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  4, /* memmov_cost  */
+  4, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
+   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  16,    /* function_align.  */
+  4,    /* jump_align.  */
+  8,    /* loop_align.  */
+  2,    /* int_reassoc_width.  */
+  4,    /* fp_reassoc_width.  */
+  1,    /* vec_reassoc_width.  */
+  2,    /* min_div_recip_mul_sf.  */
+  2,    /* min_div_recip_mul_df.  */
+  0,    /* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),    /* tune_flags.  */
+  &tsv110_prefetch_tune
+};
     static const struct tune_params exynosm1_tunings =
   {
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 5c8f66c..d6fb91d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14662,7 +14662,7 @@ performance of the code.  Permissible values for this 
option are:
   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
   @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
-@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
+@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110}.
   @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
   @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
   @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},

Re: [PATCH v2] [aarch64] Add HiSilicon tsv110 CPU support

Reply via email to