https://gcc.gnu.org/g:a910c30c7c27cd0f6d2d2694544a09fb11d611b9

commit r15-1888-ga910c30c7c27cd0f6d2d2694544a09fb11d611b9
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Tue Apr 26 11:08:55 2022 -0700

    x86: Update branch hint for Redwood Cove.
    
    According to Intel® 64 and IA-32 Architectures Optimization Reference
    Manual[1], Branch Hint is updated for Redwood Cove.
    
    --------cut from [1]-------------------------
    Starting with the Redwood Cove microarchitecture, if the predictor has
    no stored information about a branch, the branch has the Intel® SSE2
    branch taken hint (i.e., instruction prefix 3EH), When the codec
    decodes the branch, it flips the branch’s prediction from not-taken to
    taken. It then flushes the pipeline in front of it and steers this
    pipeline to fetch the taken path of the branch.
    --------cut end -----------------------------
    
    Split tune branch_prediction_hints into branch_prediction_hints_taken
    and branch_prediction_hints_not_taken, always generate branch hint for
    conditional branches, both tunes are disabled by default.
    
    [1] 
https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html
    
    gcc/
    
            * config/i386/i386.cc (ix86_print_operand): Always generate
            branch hint for conditional branches.
            * config/i386/i386.h (TARGET_BRANCH_PREDICTION_HINTS): Split
            into ..
            (TARGET_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
            (TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
            * config/i386/x86-tune.def (X86_TUNE_BRANCH_PREDICTION_HINTS):
            Split into ..
            (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
            (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.

Diff:
---
 gcc/config/i386/i386.cc      | 29 +++++++++--------------------
 gcc/config/i386/i386.h       |  6 ++++--
 gcc/config/i386/x86-tune.def | 13 +++++++++++--
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f75250f79de4..17d23bbcbc27 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14057,7 +14057,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
 
            if (!optimize
                || optimize_function_for_size_p (cfun)
-               || !TARGET_BRANCH_PREDICTION_HINTS)
+               || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
+                   && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
              return;
 
            x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
@@ -14066,25 +14067,13 @@ ix86_print_operand (FILE *file, rtx x, int code)
                int pred_val = profile_probability::from_reg_br_prob_note
                                 (XINT (x, 0)).to_reg_br_prob_base ();
 
-               if (pred_val < REG_BR_PROB_BASE * 45 / 100
-                   || pred_val > REG_BR_PROB_BASE * 55 / 100)
-                 {
-                   bool taken = pred_val > REG_BR_PROB_BASE / 2;
-                   bool cputaken
-                     = final_forward_branch_p (current_output_insn) == 0;
-
-                   /* Emit hints only in the case default branch prediction
-                      heuristics would fail.  */
-                   if (taken != cputaken)
-                     {
-                       /* We use 3e (DS) prefix for taken branches and
-                          2e (CS) prefix for not taken branches.  */
-                       if (taken)
-                         fputs ("ds ; ", file);
-                       else
-                         fputs ("cs ; ", file);
-                     }
-                 }
+               bool taken = pred_val > REG_BR_PROB_BASE / 2;
+               /* We use 3e (DS) prefix for taken branches and
+                  2e (CS) prefix for not taken branches.  */
+               if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
+                 fputs ("ds ; ", file);
+               else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
+                 fputs ("cs ; ", file);
              }
            return;
          }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 0c5292e1d646..eabb3248ea00 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -309,8 +309,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_ZERO_EXTEND_WITH_AND \
        ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
 #define TARGET_UNROLL_STRLEN   ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
-#define TARGET_BRANCH_PREDICTION_HINTS \
-       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
+       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
+#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
+       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
 #define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
 #define TARGET_USE_SAHF                ix86_tune_features[X86_TUNE_USE_SAHF]
 #define TARGET_MOVX            ix86_tune_features[X86_TUNE_MOVX]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 343c32c291fa..3d29bffc49c3 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -683,15 +683,24 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
 DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
           m_K8)
 
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
+   microarchitecture, if the predictor has no stored information about a 
branch,
+   the branch has the Intel® SSE2 branch taken hint
+   (i.e., instruction prefix 3EH), When the codec decodes the branch, it flips
+   the branch’s prediction from not-taken to taken. It then flushes the 
pipeline
+   in front of it and steers this pipeline to fetch the taken path of the
+   branch.  */
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, 
"branch_prediction_hints_taken", m_NONE)
+
 /*****************************************************************************/
 /* This never worked well before.                                            */
 /*****************************************************************************/
 
-/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 
based
    on simulation result. But after P4 was made, no performance benefit
    was observed with branch hints.  It also increases the code size.
    As a result, icc never generates branch hints.  */
-DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", m_NONE)
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, 
"branch_prediction_hints_not_taken", m_NONE)
 
 /* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic.  */
 DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)

Reply via email to