[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-01 Thread Oskar Wirga via cfe-commits


@@ -688,6 +689,68 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
   emitCalleeSavedRestores(MBB, MBBI, true);
 }
 
+void AArch64FrameLowering::allocateSVEStackSpace(
+MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+StackOffset AllocSize, StackOffset InitialOffset, bool EmitCFI) const {
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64FunctionInfo &MFI = *MF.getInfo();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget();
+  const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
+  const AArch64TargetLowering &TLI = *Subtarget.getTargetLowering();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+  // If not probing the stack or the (uknown) allocation size is less than the
+  // probe size decrement the stack pointer right away. This avoids having to
+  // emit a probing loop when allocating space for up to 16 SVE registers when
+  // using 4k probes.
+
+  // The bit-length of SVE registers is architecturally limited.
+  const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+  int64_t ProbeSize = MFI.getStackProbeSize();
+  if (!TLI.hasInlineStackProbe(MF) ||
+  AllocSize.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
+  AllocSize.getFixed() <=
+  ProbeSize) {
+emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -AllocSize, &TII,
+MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+InitialOffset);
+if (TLI.hasInlineStackProbe(MF)) {
+  // Issue a probe at the top of the stack to prepare for subsequent
+  // allocations.
+  // STR XZR, [TargetReg]

oskarwirga wrote:

```suggestion
  // STR XZR, [SP]
```

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -26262,3 +26262,37 @@ bool 
AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
   }
   return true;
 }
+
+bool AArch64TargetLowering::hasInlineStackProbe(
+const MachineFunction &MF) const {
+  // If the function specifically requests inline stack probes, emit them.
+  if (MF.getFunction().hasFnAttribute("probe-stack")) {
+if (MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+"inline-asm")
+  return true;
+else
+  llvm_unreachable("Unsupported stack probing method");
+  }
+
+  return false;
+}
+
+unsigned
+AArch64TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = Subtarget->getFrameLowering();
+  unsigned StackAlign = TFI->getStackAlignment();
+  assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+  // The default stack probe size is 4096 if the function has no
+  // stack-probe-size attribute. This is a safe default because it is the
+  // smallest possible guard page size.
+  unsigned StackProbeSize = 4096;
+  const Function &Fn = MF.getFunction();
+  if (Fn.hasFnAttribute("stack-probe-size"))

momchil-velikov wrote:

Some validation of the value would be useful.

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -1827,12 +1908,36 @@ void AArch64FrameLowering::emitPrologue(MachineFunction 
&MF,
   // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
   // the correct value here, as NumBytes also includes padding bytes,
   // which shouldn't be counted here.
-  emitFrameOffset(
-  MBB, MBBI, DL, scratchSPReg, AArch64::SP,
-  StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
-  false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+  StackOffset CFAOffset =
   SVEStackSize +
-  StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+  StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+  if (NeedsStackProbe && !NeedsRealignment) {
+// If we don't need to re-align the stack, we can use a more efficient
+// sequence for stack probing.
+Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);

momchil-velikov wrote:

Why do we need scratch reg here?

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -4052,3 +4193,192 @@ void AArch64FrameLowering::orderFrameObjects(
 dbgs() << "\n";
   });
 }
+
+/// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
+/// least every ProbeSize bytes. Returns an iterator of the first instruction
+/// after the loop. The difference between SP and TargetReg must be an exact
+/// multiple of ProbeSize.
+MachineBasicBlock::iterator
+AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
+MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
+Register TargetReg) const {
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64InstrInfo *TII =
+  MF.getSubtarget().getInstrInfo();
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+  MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopMBB);
+  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, ExitMBB);
+
+  // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
+  // in SUB).
+  emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
+  StackOffset::getFixed(-ProbeSize), TII,
+  MachineInstr::FrameSetup);
+  // STR XZR, [SP]
+  BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
+  .addReg(AArch64::XZR)
+  .addReg(AArch64::SP)
+  .addImm(0)
+  .setMIFlags(MachineInstr::FrameSetup);
+  // CMP SP, TargetReg
+  BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+  AArch64::XZR)
+  .addReg(AArch64::SP)
+  .addReg(TargetReg)
+  .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+  .setMIFlags(MachineInstr::FrameSetup);
+  // B.CC Loop
+  BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
+  .addImm(AArch64CC::NE)
+  .addMBB(LoopMBB)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  LoopMBB->addSuccessor(ExitMBB);
+  LoopMBB->addSuccessor(LoopMBB);
+  // Synthesize the exit MBB.
+  ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
+  ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  MBB.addSuccessor(LoopMBB);
+  // Update liveins.
+  recomputeLiveIns(*LoopMBB);
+  recomputeLiveIns(*ExitMBB);
+
+  return ExitMBB->begin();
+}
+
+MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeFixed(
+MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
+StackOffset CFAOffset) const {
+  MachineBasicBlock *MBB = MBBI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  const AArch64TargetLowering *TLI =
+  MF.getSubtarget().getTargetLowering();
+  const AArch64InstrInfo *TII =
+  MF.getSubtarget().getInstrInfo();
+  AArch64FunctionInfo *AFI = MF.getInfo();
+  bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+  bool HasFP = hasFP(MF);
+
+  DebugLoc DL;
+  int64_t ProbeSize = TLI->getStackProbeSize(MF);
+  int64_t NumBlocks = FrameSize / ProbeSize;
+  int64_t ResidualSize = FrameSize % ProbeSize;
+
+  LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
+<< NumBlocks << " blocks of " << ProbeSize
+<< " bytes, plus " << ResidualSize << " bytes\n");
+
+  // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
+  // ordinary loop.
+  if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
+for (int i = 0; i < NumBlocks; ++i) {
+  // SUB SP, SP, #FrameSize (or equivalent if FrameSize is not

momchil-velikov wrote:

Wrong comment (code is OK): ProbeSize, not FrameSize.

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -672,6 +673,74 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
   emitCalleeSavedRestores(MBB, MBBI, true);
 }
 
+void AArch64FrameLowering::allocateSVEStackSpace(
+MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+StackOffset AllocSize, StackOffset InitialOffset, bool EmitCFI) const {
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget();
+  const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
+  const AArch64TargetLowering &TLI = *Subtarget.getTargetLowering();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+  // If not probing the stack or the (uknown) allocation size is less than the
+  // probe size decrement the stack pointer right away. This avoids having to
+  // emit a probing loop when allocating space for up to 16 SVE registers when
+  // using 4k probes.
+
+  // The bit-length of SVE registers is architecturally limited.
+  const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+  int64_t ProbeSize = TLI.getStackProbeSize(MF);
+  if (!TLI.hasInlineStackProbe(MF) ||
+  AllocSize.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
+  AllocSize.getFixed() <=
+  ProbeSize) {
+emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -AllocSize, &TII,
+MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+InitialOffset);
+if (TLI.hasInlineStackProbe(MF)) {
+  // Issue a probe at the top of the stack to prepare for subsequent
+  // allocations.
+  // STR XZR, [TargetReg]
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+  .addReg(AArch64::XZR)
+  .addReg(AArch64::SP)
+  .addImm(0)
+  .setMIFlags(MachineInstr::FrameSetup);
+}
+return;
+  }
+
+  // If we can't be sure the allocation size if less than the probe size, we
+  // have to emit a stack probing loop.
+  Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+  assert(ScratchReg != AArch64::NoRegister);
+  // Get the new top of the stack into a scratch register.
+  emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
+  MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+  InitialOffset);
+  // Arrange to emit a probing loop by decrementing SP until it reaches that
+  // new top of the stack.
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR), AArch64::SP)
+  .addReg(ScratchReg);
+  // Set SP to its new value.
+  // MOV SP, Xs
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::ADDXri), AArch64::SP)
+  .addReg(ScratchReg)
+  .addImm(0)
+  .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+  .setMIFlags(MachineInstr::FrameSetup);
+  if (EmitCFI) {

momchil-velikov wrote:

Taken care of in invocations of `allocateSVEStackSpace`

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -9460,6 +9461,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
   return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
 }
 
+MachineBasicBlock::iterator
+AArch64InstrInfo::insertStackProbingLoop(MachineBasicBlock::iterator MBBI,
+ Register ScratchReg,
+ Register TargetReg) const {
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64InstrInfo *TII =
+  MF.getSubtarget().getInstrInfo();
+  int64_t ProbeSize = MF.getInfo()->getStackProbeSize();
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+  MachineBasicBlock *LoopTestMBB =
+  MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopTestMBB);
+  MachineBasicBlock *LoopBodyMBB =
+  MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopBodyMBB);
+  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, ExitMBB);
+
+  // LoopTest:
+  //   SUB ScratchReg, ScratchReg, #ProbeSize
+  emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg,
+  StackOffset::getFixed(-ProbeSize), TII,
+  MachineInstr::FrameSetup);
+
+  //   CMP ScratchReg, TargetReg
+  AArch64CC::CondCode Cond = AArch64CC::LE;
+  Register Op1 = ScratchReg;
+  Register Op2 = TargetReg;
+  if (Op2 == AArch64::SP) {
+assert(Op1 != AArch64::SP && "At most one of the registers can be SP");
+// CMP TargetReg, ScratchReg
+std::swap(Op1, Op2);
+Cond = AArch64CC::GT;
+  }
+  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+  AArch64::XZR)
+  .addReg(Op1)
+  .addReg(Op2)
+  .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  //   B. LoopExit
+  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
+  .addImm(Cond)
+  .addMBB(ExitMBB)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  //   STR XZR, [ScratchReg]
+  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
+  .addReg(AArch64::XZR)
+  .addReg(ScratchReg)
+  .addImm(0)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  //   B loop
+  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
+  .addMBB(LoopTestMBB)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  // LoopExit:
+  //   STR XZR, [TargetReg]
+  BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui))
+  .addReg(AArch64::XZR)
+  .addReg(TargetReg)
+  .addImm(0)
+  .setMIFlags(MachineInstr::FrameSetup);

momchil-velikov wrote:

> ```
> sub sp, sp, #0x1, lsl #0xc
> cmp sp, x1
> b.le0x557388
> str xzr, [x1]  {0x0}
> ```
> 
> We are probing the _old_ stack head! `x1` contains `0x7fee80` but `sp` is 
> at `7fde80`! This means that the selection of the `x1` register instead 
> of `sp` is incorrect.

I can't quite see how it is possible to generate this code. This is part of the 
sequence for allocating a compile time unknown amount of stack space that is 
done by `AArch64InstrInfo::insertStackProbingLoop`. In this function 
`TargetReg` is the new
top of the stack and right now [1] `ScratchReg` is always `AArch64::SP` .


Thus first we have
```
  // LoopTest:
  //   SUB ScratchReg, ScratchReg, #ProbeSize
  emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg,
  StackOffset::getFixed(-ProbeSize), TII,
  MachineInstr::FrameSetup);
```

This is the code the emits the ` su sp, sp, #0x1, lsl #0xc`. Note, it uses 
`ScratchReg`.

Then we emit the compare
```
  //   CMP ScratchReg, TargetReg

  AArch64CC::CondCode Cond = AArch64CC::LE;
  Register Op1 = ScratchReg;
  Register Op2 = TargetReg;
  if (Op2 == AArch64::SP) { // condition is false here
  // ...
  }

  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
  AArch64::XZR)
  .addReg(Op1)
  .addReg(Op2)
  .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
  .setMIFlags(MachineInstr::FrameSetup);
```

That is  the `cmp sp, x1`.  So, `Op2` is `TargetReg` and `TargetReg` is `x1`.

Then we emit the loop exit branch:
```
  //   B. LoopExit
  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
  .addImm(Cond)
  .addMBB(ExitMBB)
  .setMIFlags(MachineInstr::FrameSetup);
```

This is the `b.le0x557388` above.

and then, still inside the probing loop, we emit a stack probe to `ScratchReg`, 
i.e. to `SP`.

```
  //   STR XZR, [ScratchReg]
  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
  .addReg(AArch64::XZR)
  .addReg(ScratchReg)
  .addImm(0)
  .setMIFlags(MachineInstr::FrameSetup);
```

However, i

[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -26262,3 +26262,37 @@ bool 
AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
   }
   return true;
 }
+
+bool AArch64TargetLowering::hasInlineStackProbe(
+const MachineFunction &MF) const {
+  // If the function specifically requests inline stack probes, emit them.
+  if (MF.getFunction().hasFnAttribute("probe-stack")) {
+if (MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+"inline-asm")
+  return true;
+else
+  llvm_unreachable("Unsupported stack probing method");
+  }
+
+  return false;
+}
+
+unsigned
+AArch64TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = Subtarget->getFrameLowering();
+  unsigned StackAlign = TFI->getStackAlignment();
+  assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+  // The default stack probe size is 4096 if the function has no
+  // stack-probe-size attribute. This is a safe default because it is the
+  // smallest possible guard page size.
+  unsigned StackProbeSize = 4096;
+  const Function &Fn = MF.getFunction();
+  if (Fn.hasFnAttribute("stack-probe-size"))

momchil-velikov wrote:

The rounding to the stack alignment size is enough. Rounding down is the safer 
choice, zero is handled, there's no requirement other than be a multiple of 
stack alignment. Some choices might not be appropriate for certain platforms, 
e.g. 
5k or 8k probe size with 4k guard page size, but that's not something that can 
be validated here. Values, greater than 64k might also be legit, e.g. a 
specific platform allocates 2 guard pages (128k) at the top of the stack to 
limit probing overhead.

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -9460,6 +9461,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
   return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
 }
 
+MachineBasicBlock::iterator
+AArch64InstrInfo::insertStackProbingLoop(MachineBasicBlock::iterator MBBI,
+ Register ScratchReg,
+ Register TargetReg) const {
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64InstrInfo *TII =
+  MF.getSubtarget().getInstrInfo();
+  int64_t ProbeSize = MF.getInfo()->getStackProbeSize();
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+  MachineBasicBlock *LoopTestMBB =
+  MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopTestMBB);
+  MachineBasicBlock *LoopBodyMBB =
+  MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopBodyMBB);
+  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, ExitMBB);
+
+  // LoopTest:
+  //   SUB ScratchReg, ScratchReg, #ProbeSize
+  emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg,
+  StackOffset::getFixed(-ProbeSize), TII,
+  MachineInstr::FrameSetup);
+
+  //   CMP ScratchReg, TargetReg
+  AArch64CC::CondCode Cond = AArch64CC::LE;
+  Register Op1 = ScratchReg;
+  Register Op2 = TargetReg;
+  if (Op2 == AArch64::SP) {
+assert(Op1 != AArch64::SP && "At most one of the registers can be SP");
+// CMP TargetReg, ScratchReg
+std::swap(Op1, Op2);
+Cond = AArch64CC::GT;
+  }
+  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+  AArch64::XZR)
+  .addReg(Op1)
+  .addReg(Op2)
+  .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  //   B. LoopExit
+  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
+  .addImm(Cond)
+  .addMBB(ExitMBB)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  //   STR XZR, [ScratchReg]
+  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
+  .addReg(AArch64::XZR)
+  .addReg(ScratchReg)
+  .addImm(0)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  //   B loop
+  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
+  .addMBB(LoopTestMBB)
+  .setMIFlags(MachineInstr::FrameSetup);
+
+  // LoopExit:
+  //   STR XZR, [TargetReg]
+  BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui))
+  .addReg(AArch64::XZR)
+  .addReg(TargetReg)
+  .addImm(0)
+  .setMIFlags(MachineInstr::FrameSetup);

momchil-velikov wrote:

I have now fixed this issue.

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for function prologues (PR #66524)

2023-11-21 Thread Momchil Velikov via cfe-commits


@@ -1076,6 +1076,16 @@ void CodeGenModule::Release() {
 "sign-return-address-with-bkey", 1);
   }
 
+  if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
+auto *InlineAsm = llvm::MDString::get(TheModule.getContext(), 
"inline-asm");
+if (CodeGenOpts.StackClashProtector)
+  getModule().addModuleFlag(llvm::Module::Override, "probe-stack",
+InlineAsm);

momchil-velikov wrote:

We would like to use a module flag so the stack clash protection is effective 
for functions created by LLVM (e..g `asan.module_ctor`).
It is not AArch64 specific in principle, but other backends which implement SCP 
still rely on function attributes. When/if other backends adopt this approach 
the condition can be removed.
@serge-sans-paille 

https://github.com/llvm/llvm-project/pull/66524
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits