Hello tech@,
I had a discussion about this earlier with kn@ and he suggested me
to post this here, so here it is...

If I'm understanding it correctly, one of the things preventing
sparc64 from fully switching to clang/LLVM is that the generated
binaries (particularly, for the kernel) are much larger than GCC-built
ones, so I tried to tinker with it to reduce the size of LLVM-built
kernels, and here's what I did and the results so far:

1. Improve branches on 64-bit integers in LLVM.
   This mainly gets rid of accidental emission of useless ba/nop pairs.
   Those branches are common and so the savings end up pretty big.
   The patch is already accepted upstream at https://llvm.org/D130006,
   but it'll only be released with LLVM 16 so I'm giving a version
   adapted for OpenBSD's LLVM 13 below.

2. I noticed that LLVM's inliner is very aggressive (much more than
   GCC's) and will happily inline large functions, which,
   in some cases, increased the sizes of functions by > 2x.
   Currently I'm experimenting with adding
   COPTS="-fno-inline-functions -finline-hint-functions" to the make
   command when building the kernel with LLVM to turn off inlining
   except for functions explicitly declared inline, and so far
   the build seems to finish and boot normally.
   This should give some size reduction while keeping most of
   the benefits of performance optimizations
   (as opposed to going all the way with, say, -Os/-Oz).
   This could also be applicable to other binaries and/or arches, but
   on the other hand, I am not sure if globally limiting inlining in
   this way is a good idea...

With those two changes, I was able to get the kernel binary to be
about 87 KiB larger than the GCC-built one:

text    data    bss     dec     hex
9534528 2294133 725888  12554549        bf9135  bsd.clang
9085212 2294068 727320  12106600        b8bb68  bsd.clang+patch
8089416 2295436 728216  11113068        a9926c  bsd.clang+patch+noinline
7862920 2429596 730968  11023484        a8347c  bsd.gcc

Any comments?

This is the adapted LLVM patch:

diff --git a/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.cpp 
b/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 747f1b568a7..07ce6094ecd 100644
--- a/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1832,7 +1832,10 @@ const char 
*SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::CMPICC:          return "SPISD::CMPICC";
   case SPISD::CMPFCC:          return "SPISD::CMPFCC";
   case SPISD::BRICC:           return "SPISD::BRICC";
-  case SPISD::BRXCC:           return "SPISD::BRXCC";
+  case SPISD::BPICC:
+    return "SPISD::BPICC";
+  case SPISD::BPXCC:
+    return "SPISD::BPXCC";
   case SPISD::BRFCC:           return "SPISD::BRFCC";
   case SPISD::SELECT_ICC:      return "SPISD::SELECT_ICC";
   case SPISD::SELECT_XCC:      return "SPISD::SELECT_XCC";
@@ -2434,8 +2437,8 @@ static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG 
&DAG,
 }
 
 static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
-                          const SparcTargetLowering &TLI,
-                          bool hasHardQuad) {
+                          const SparcTargetLowering &TLI, bool hasHardQuad,
+                          bool isV9) {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue LHS = Op.getOperand(2);
@@ -2453,13 +2456,17 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
   if (LHS.getValueType().isInteger()) {
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
-    // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
-    Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
+    if (isV9)
+      // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
+      Opc = LHS.getValueType() == MVT::i32 ? SPISD::BPICC : SPISD::BPXCC;
+    else
+      // Non-v9 targets don't have xcc.
+      Opc = SPISD::BRICC;
   } else {
     if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
       if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
       CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
-      Opc = SPISD::BRICC;
+      Opc = isV9 ? SPISD::BPICC : SPISD::BRICC;
     } else {
       CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
       if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
@@ -3035,8 +3042,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
                                                        hasHardQuad);
   case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG, *this,
                                                        hasHardQuad);
-  case ISD::BR_CC:              return LowerBR_CC(Op, DAG, *this,
-                                                  hasHardQuad);
+  case ISD::BR_CC:
+    return LowerBR_CC(Op, DAG, *this, hasHardQuad, isV9);
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG, *this,
                                                       hasHardQuad);
   case ISD::VASTART:            return LowerVASTART(Op, DAG, *this);
@@ -3115,6 +3122,8 @@ 
SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case SP::SELECT_CC_FP_ICC:
   case SP::SELECT_CC_DFP_ICC:
   case SP::SELECT_CC_QFP_ICC:
+    if (Subtarget->isV9())
+      return expandSelectCC(MI, BB, SP::BPICC);
     return expandSelectCC(MI, BB, SP::BCOND);
   case SP::SELECT_CC_Int_XCC:
   case SP::SELECT_CC_FP_XCC:
diff --git a/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.h 
b/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.h
index 5c9703823a6..6a3f9757980 100644
--- a/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/gnu/llvm/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -23,30 +23,36 @@ namespace llvm {
   namespace SPISD {
     enum NodeType : unsigned {
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
-      CMPICC,      // Compare two GPR operands, set icc+xcc.
-      CMPFCC,      // Compare two FP operands, set fcc.
-      BRICC,       // Branch to dest on icc condition
-      BRXCC,       // Branch to dest on xcc condition (64-bit only).
-      BRFCC,       // Branch to dest on fcc condition
-      SELECT_ICC,  // Select between two values using the current ICC flags.
-      SELECT_XCC,  // Select between two values using the current XCC flags.
-      SELECT_FCC,  // Select between two values using the current FCC flags.
-
-      Hi, Lo,      // Hi/Lo operations, typically on a global address.
-
-      FTOI,        // FP to Int within a FP register.
-      ITOF,        // Int to FP within a FP register.
-      FTOX,        // FP to Int64 within a FP register.
-      XTOF,        // Int64 to FP within a FP register.
-
-      CALL,        // A call instruction.
-      RET_FLAG,    // Return with a flag operand.
+      CMPICC, // Compare two GPR operands, set icc+xcc.
+      CMPFCC, // Compare two FP operands, set fcc.
+      BRICC,  // Branch to dest on icc condition
+      BPICC,  // Branch to dest on icc condition, with prediction (64-bit 
only).
+      BPXCC,  // Branch to dest on xcc condition, with prediction (64-bit 
only).
+      BRFCC,  // Branch to dest on fcc condition
+      SELECT_ICC, // Select between two values using the current ICC flags.
+      SELECT_XCC, // Select between two values using the current XCC flags.
+      SELECT_FCC, // Select between two values using the current FCC flags.
+
+      Hi,
+      Lo, // Hi/Lo operations, typically on a global address.
+
+      FTOI, // FP to Int within a FP register.
+      ITOF, // Int to FP within a FP register.
+      FTOX, // FP to Int64 within a FP register.
+      XTOF, // Int64 to FP within a FP register.
+
+      CALL,            // A call instruction.
+      RET_FLAG,        // Return with a flag operand.
       GLOBAL_BASE_REG, // Global base reg for PIC.
-      FLUSHW,      // FLUSH register windows to stack.
+      FLUSHW,          // FLUSH register windows to stack.
 
-      TLS_ADD,     // For Thread Local Storage (TLS).
+      TAIL_CALL, // Tail call
+
+      TLS_ADD, // For Thread Local Storage (TLS).
       TLS_LD,
-      TLS_CALL
+      TLS_CALL,
+
+      LOAD_GDOP, // Load operation w/ gdop relocation.
     };
   }
 
diff --git a/gnu/llvm/llvm/lib/Target/Sparc/SparcInstr64Bit.td 
b/gnu/llvm/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index dc60f563ae8..7a61603ef15 100644
--- a/gnu/llvm/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/gnu/llvm/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -302,13 +302,13 @@ def : Pat<(store (i64 0), ADDRri:$dst), (STXri 
ADDRri:$dst, (i64 G0))>;
 // The icc flags correspond to the 32-bit result, and the xcc are for the
 // full 64-bit result.
 //
-// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for
+// We reuse CMPICC SDNodes for compares, but use new BPXCC branch nodes for
 // 64-bit compares. See LowerBR_CC.
 
 let Predicates = [Is64Bit] in {
 
 let Uses = [ICC], cc = 0b10 in
-  defm BPX : IPredBranch<"%xcc", [(SPbrxcc bb:$imm19, imm:$cond)]>;
+  defm BPX : IPredBranch<"%xcc", [(SPbpxcc bb:$imm19, imm:$cond)]>;
 
 // Conditional moves on %xcc.
 let Uses = [ICC], Constraints = "$f = $rd" in {
diff --git a/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.cpp 
b/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index dc3a41c6309..be688ea143d 100644
--- a/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -140,10 +140,25 @@ static SPCC::CondCodes 
GetOppositeBranchCondition(SPCC::CondCodes CC)
   llvm_unreachable("Invalid cond code");
 }
 
-static bool isUncondBranchOpcode(int Opc) { return Opc == SP::BA; }
+static bool isUncondBranchOpcode(int Opc) {
+  return Opc == SP::BA || Opc == SP::BPA;
+}
+
+static bool isI32CondBranchOpcode(int Opc) {
+  return Opc == SP::BCOND || Opc == SP::BPICC || Opc == SP::BPICCA ||
+         Opc == SP::BPICCNT || Opc == SP::BPICCANT;
+}
+
+static bool isI64CondBranchOpcode(int Opc) {
+  return Opc == SP::BPXCC || Opc == SP::BPXCCA || Opc == SP::BPXCCNT ||
+         Opc == SP::BPXCCANT;
+}
+
+static bool isFCondBranchOpcode(int Opc) { return Opc == SP::FBCOND; }
 
 static bool isCondBranchOpcode(int Opc) {
-  return Opc == SP::FBCOND || Opc == SP::BCOND;
+  return isI32CondBranchOpcode(Opc) || isI64CondBranchOpcode(Opc) ||
+         isFCondBranchOpcode(Opc);
 }
 
 static bool isIndirectBranchOpcode(int Opc) {
@@ -152,7 +167,14 @@ static bool isIndirectBranchOpcode(int Opc) {
 
 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
                             SmallVectorImpl<MachineOperand> &Cond) {
-  Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(1).getImm()));
+  unsigned Opc = LastInst->getOpcode();
+  int64_t CC = LastInst->getOperand(1).getImm();
+
+  // Push the branch opcode into Cond too so later in insertBranch
+  // it can use the information to emit the correct SPARC branch opcode.
+  Cond.push_back(MachineOperand::CreateImm(Opc));
+  Cond.push_back(MachineOperand::CreateImm(CC));
+
   Target = LastInst->getOperand(0).getMBB();
 }
 
@@ -246,27 +268,29 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock 
&MBB,
                                       const DebugLoc &DL,
                                       int *BytesAdded) const {
   assert(TBB && "insertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 1 || Cond.size() == 0) &&
-         "Sparc branch conditions should have one component!");
+  assert((Cond.size() <= 2) &&
+         "Sparc branch conditions should have at most two components!");
   assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
     assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(Subtarget.isV9() ? SP::BPA : SP::BA)).addMBB(TBB);
     return 1;
   }
 
   // Conditional branch
-  unsigned CC = Cond[0].getImm();
+  unsigned Opc = Cond[0].getImm();
+  unsigned CC = Cond[1].getImm();
 
-  if (IsIntegerCC(CC))
-    BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
-  else
+  if (IsIntegerCC(CC)) {
+    BuildMI(&MBB, DL, get(Opc)).addMBB(TBB).addImm(CC);
+  } else {
     BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+  }
   if (!FBB)
     return 1;
 
-  BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+  BuildMI(&MBB, DL, get(Subtarget.isV9() ? SP::BPA : SP::BA)).addMBB(FBB);
   return 2;
 }
 
@@ -282,9 +306,8 @@ unsigned SparcInstrInfo::removeBranch(MachineBasicBlock 
&MBB,
     if (I->isDebugInstr())
       continue;
 
-    if (I->getOpcode() != SP::BA
-        && I->getOpcode() != SP::BCOND
-        && I->getOpcode() != SP::FBCOND)
+    if (!isCondBranchOpcode(I->getOpcode()) &&
+        !isUncondBranchOpcode(I->getOpcode()))
       break; // Not a branch
 
     I->eraseFromParent();
@@ -296,9 +319,9 @@ unsigned SparcInstrInfo::removeBranch(MachineBasicBlock 
&MBB,
 
 bool SparcInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
-  assert(Cond.size() == 1);
-  SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[0].getImm());
-  Cond[0].setImm(GetOppositeBranchCondition(CC));
+  assert(Cond.size() <= 2);
+  SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[1].getImm());
+  Cond[1].setImm(GetOppositeBranchCondition(CC));
   return false;
 }
 
diff --git a/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.td 
b/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 9662327e44f..de49bd1c96b 100644
--- a/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/gnu/llvm/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -217,7 +217,8 @@ SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
 def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
 def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
 def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
-def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbpicc : SDNode<"SPISD::BPICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbpxcc : SDNode<"SPISD::BPXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 
 def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
@@ -808,19 +809,27 @@ defm SAVE    : F3_12np<"save"   , 0b111100>;
 defm RESTORE : F3_12np<"restore", 0b111101>;
 
 // Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+// Section A.7 - Branch on Integer Condition Codes with Prediction (SPARC v9)
 
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
 // unconditional branch class.
 class BranchAlways<dag ins, string asmstr, list<dag> pattern>
-  : F2_2<0b010, 0, (outs), ins, asmstr, pattern> {
-  let isBranch     = 1;
-  let isTerminator = 1;
-  let hasDelaySlot = 1;
-  let isBarrier    = 1;
+  : F2_2<0b010, 0, (outs), ins, asmstr, pattern>;
+
+// Same as BranchAlways but uses the new v9 encoding
+class BranchPredictAlways<dag ins, string asmstr, list<dag> pattern>
+  : F2_3<0b001, 0, 1, (outs), ins, asmstr, pattern>;
 }
 
-let cond = 8 in
-  def BA : BranchAlways<(ins brtarget:$imm22), "ba $imm22", [(br bb:$imm22)]>;
+let cond = 8 in {
+  // If we're compiling for v9, prefer BPA rather than BA
+  // TODO: Disallow BA emission when FeatureV8Deprecated isn't enabled
+  let Predicates = [HasV9], cc = 0b00 in
+    def BPA : BranchPredictAlways<(ins bprtarget:$imm19),
+      "ba %icc, $imm19", [(br bb:$imm19)]>;
 
+  def BA : BranchAlways<(ins brtarget:$imm22), "ba $imm22", [(br bb:$imm22)]>;
+}
 
 let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in {
 
@@ -876,7 +885,7 @@ let Uses = [ICC] in {
                          "b$cond,a $imm22", []>;
 
   let Predicates = [HasV9], cc = 0b00 in
-    defm BPI : IPredBranch<"%icc", []>;
+    defm BPI : IPredBranch<"%icc", [(SPbpicc bb:$imm19, imm:$cond)]>;
 }
 
 // Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121

Reply via email to