4vtomat updated this revision to Diff 545413.
4vtomat marked 2 inline comments as done.
4vtomat added a comment.

Resolved Aaron's comments.
Also group some code and refactoring some code to make it much clear.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154576/new/

https://reviews.llvm.org/D154576

Files:
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Basic/AttrDocs.td
  clang/include/clang/Basic/Specifiers.h
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/Type.cpp
  clang/lib/AST/TypePrinter.cpp
  clang/lib/Basic/Targets/RISCV.cpp
  clang/lib/Basic/Targets/RISCV.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
  clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp
  llvm/include/llvm/AsmParser/LLToken.h
  llvm/include/llvm/IR/CallingConv.h
  llvm/lib/AsmParser/LLLexer.cpp
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/IR/AsmWriter.cpp
  llvm/lib/Target/RISCV/RISCVCallingConv.td
  llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
  llvm/lib/Target/RISCV/RISCVISelLowering.cpp
  llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
  llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll

Index: llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m -mattr=+v -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i32> @test_vector_std(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_std:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    addi sp, sp, -16
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    addi a0, sp, 16
+; SPILL-O2-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    addi sp, sp, 16
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 1 x i32> %va
+}
+
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    addi sp, sp, -16
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 4
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 4
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 14
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 13
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v3, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 12
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 11
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v5, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 10
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v6, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v7, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v24, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 6
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v26, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 2
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v27, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v28, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 1
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v29, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v30, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vs1r.v v31, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    addi a0, sp, 16
+; SPILL-O2-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 4
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v1, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 14
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v2, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 13
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v3, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 12
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 11
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v5, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 10
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v24, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 3
+; SPILL-O2-NEXT:    sub a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v25, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    li a1, 6
+; SPILL-O2-NEXT:    mul a0, a0, a1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v26, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 2
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v27, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v28, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a1, a0, 1
+; SPILL-O2-NEXT:    add a0, a1, a0
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v29, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v30, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    addi a0, a0, 16
+; SPILL-O2-NEXT:    vl1r.v v31, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 4
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    addi sp, sp, 16
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 1 x i32> %va
+}
Index: llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -66,17 +66,26 @@
     return CSR_Interrupt_SaveList;
   }
 
+  bool HasVectorCSR =
+      MF->getFunction().getCallingConv() == CallingConv::RISCV_VectorCall;
+
   switch (Subtarget.getTargetABI()) {
   default:
     llvm_unreachable("Unrecognized ABI");
   case RISCVABI::ABI_ILP32:
   case RISCVABI::ABI_LP64:
+    if (HasVectorCSR)
+      return CSR_ILP32_LP64_V_SaveList;
     return CSR_ILP32_LP64_SaveList;
   case RISCVABI::ABI_ILP32F:
   case RISCVABI::ABI_LP64F:
+    if (HasVectorCSR)
+      return CSR_ILP32F_LP64F_V_SaveList;
     return CSR_ILP32F_LP64F_SaveList;
   case RISCVABI::ABI_ILP32D:
   case RISCVABI::ABI_LP64D:
+    if (HasVectorCSR)
+      return CSR_ILP32D_LP64D_V_SaveList;
     return CSR_ILP32D_LP64D_SaveList;
   }
 }
@@ -636,12 +645,18 @@
     llvm_unreachable("Unrecognized ABI");
   case RISCVABI::ABI_ILP32:
   case RISCVABI::ABI_LP64:
+    if (CC == CallingConv::RISCV_VectorCall)
+      return CSR_ILP32_LP64_V_RegMask;
     return CSR_ILP32_LP64_RegMask;
   case RISCVABI::ABI_ILP32F:
   case RISCVABI::ABI_LP64F:
+    if (CC == CallingConv::RISCV_VectorCall)
+      return CSR_ILP32F_LP64F_V_RegMask;
     return CSR_ILP32F_LP64F_RegMask;
   case RISCVABI::ABI_ILP32D:
   case RISCVABI::ABI_LP64D:
+    if (CC == CallingConv::RISCV_VectorCall)
+      return CSR_ILP32D_LP64D_V_RegMask;
     return CSR_ILP32D_LP64D_RegMask;
   }
 }
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14899,6 +14899,7 @@
     report_fatal_error("Unsupported calling convention");
   case CallingConv::C:
   case CallingConv::Fast:
+  case CallingConv::RISCV_VectorCall:
     break;
   case CallingConv::GHC:
     if (!Subtarget.hasStdExtF() || !Subtarget.hasStdExtD())
Index: llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -379,6 +379,21 @@
   return NonLibcallCSI;
 }
 
+static SmallVector<CalleeSavedInfo, 8>
+getRVVCalleeSavedInfo(const MachineFunction &MF,
+                      const std::vector<CalleeSavedInfo> &CSI) {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  SmallVector<CalleeSavedInfo, 8> RVVCSI;
+
+  for (auto &CS : CSI) {
+    int FI = CS.getFrameIdx();
+    if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector)
+      RVVCSI.push_back(CS);
+  }
+
+  return RVVCSI;
+}
+
 void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF,
                                            MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MBBI,
@@ -578,6 +593,10 @@
   // directives.
   for (const auto &Entry : CSI) {
     int FrameIdx = Entry.getFrameIdx();
+    if (FrameIdx >=0 &&
+        MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+      continue;
+
     int64_t Offset;
     // Offsets for objects with fixed locations (IE: those saved by libcall) are
     // simply calculated from the frame index.
@@ -721,7 +740,7 @@
 
   const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
 
-  // Skip to before the restores of callee-saved registers
+  // Skip to before the restores of scalar callee-saved registers
   // FIXME: assumes exactly one instruction is used to restore each
   // callee-saved register.
   auto LastFrameDestroy = MBBI;
@@ -1016,15 +1035,24 @@
   MachineFrameInfo &MFI = MF.getFrameInfo();
   // Create a buffer of RVV objects to allocate.
   SmallVector<int, 8> ObjectsToAllocate;
-  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
-    unsigned StackID = MFI.getStackID(I);
-    if (StackID != TargetStackID::ScalableVector)
-      continue;
-    if (MFI.isDeadObjectIndex(I))
-      continue;
+  auto pushRVVObjects = [&](int FIBegin, int FIEnd) {
+    for (int I = FIBegin, E = FIEnd; I != E; ++I) {
+      unsigned StackID = MFI.getStackID(I);
+      if (StackID != TargetStackID::ScalableVector)
+        continue;
+      if (MFI.isDeadObjectIndex(I))
+        continue;
 
-    ObjectsToAllocate.push_back(I);
-  }
+      ObjectsToAllocate.push_back(I);
+    }
+  };
+  // First push RVV Callee Saved object, then push RVV stack object
+  std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
+  const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI);
+  if (RVVCSI.size())
+    pushRVVObjects(RVVCSI[0].getFrameIdx(),
+                   RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1);
+  pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size());
 
   // The minimum alignment is 16 bytes.
   Align RVVStackAlign(16);
@@ -1356,13 +1384,19 @@
 
   // Manually spill values not spilled by libcall & Push/Pop.
   const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
-  for (auto &CS : UnmanagedCSI) {
-    // Insert the spill to the stack frame.
-    Register Reg = CS.getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
-                            RC, TRI, Register());
-  }
+  const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
+
+  auto storeRegToStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
+    for (auto &CS: CSInfo) {
+      // Insert the spill to the stack frame.
+      Register Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
+                              RC, TRI, Register());
+    }
+  };
+  storeRegToStackSlot(UnmanagedCSI);
+  storeRegToStackSlot(RVVCSI);
 
   return true;
 }
@@ -1385,14 +1419,46 @@
   // first in the epilogue. It increases the opportunity to avoid the
   // load-to-use data hazard between loading RA and return by RA.
   // loadRegFromStackSlot can insert multiple instructions.
+  //
+  //
+  // We first change the restore order for scalar and vector
+  // callee-saved registers as the layout shown below:
+  //
+  // Epilog restore order (original):
+  //     ----------------------------
+  //      RVV objects
+  //     ----------------------------
+  //      Callee-saved regs(scalar)
+  //      Callee-saved regs(vector)
+  //     ----------------------------
+  //
+  // Epilog restore order (after):
+  //     ----------------------------
+  //      RVV objects
+  //     ----------------------------
+  //      Callee-saved regs(vector)
+  //      Callee-saved regs(scalar)
+  //     ----------------------------
+  //
+  // So that it is able to put all vector registers which need
+  // to be restored together. The return address will be restored
+  // first in the scalar regs. It increases the opportunity to avoid the
+  // load-to-use data hazard between loading RA and return by RA.
+  // loadRegFromStackSlot can insert multiple instructions.
   const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
-  for (auto &CS : UnmanagedCSI) {
-    Register Reg = CS.getReg();
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
-                             Register());
-    assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
-  }
+  const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, CSI);
+
+  auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
+    for (auto &CS: CSInfo) {
+      Register Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
+                               Register());
+      assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+    }
+  };
+  loadRegFromStackSlot(RVVCSI);
+  loadRegFromStackSlot(UnmanagedCSI);
 
   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
   if (RVFI->isPushable(*MF)) {
Index: llvm/lib/Target/RISCV/RISCVCallingConv.td
===================================================================
--- llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -24,6 +24,17 @@
     : CalleeSavedRegs<(add CSR_ILP32_LP64,
                        F8_D, F9_D, (sequence "F%u_D", 18, 27))>;
 
+defvar CSR_V = (add (sequence "V%u", 1, 7), (sequence "V%u", 24, 31));
+
+def CSR_ILP32_LP64_V
+    : CalleeSavedRegs<(add CSR_ILP32_LP64, CSR_V)>;
+
+def CSR_ILP32F_LP64F_V
+    : CalleeSavedRegs<(add CSR_ILP32F_LP64F, CSR_V)>;
+
+def CSR_ILP32D_LP64D_V
+    : CalleeSavedRegs<(add CSR_ILP32D_LP64D, CSR_V)>;
+
 // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
Index: llvm/lib/IR/AsmWriter.cpp
===================================================================
--- llvm/lib/IR/AsmWriter.cpp
+++ llvm/lib/IR/AsmWriter.cpp
@@ -350,6 +350,7 @@
     break;
   case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
   case CallingConv::AMDGPU_Gfx:    Out << "amdgpu_gfx"; break;
+  case CallingConv::RISCV_VectorCall: Out << "riscv_vector_cc"; break;
   }
 }
 
Index: llvm/lib/AsmParser/LLParser.cpp
===================================================================
--- llvm/lib/AsmParser/LLParser.cpp
+++ llvm/lib/AsmParser/LLParser.cpp
@@ -2031,6 +2031,7 @@
 ///   ::= 'amdgpu_cs_chain_preserve'
 ///   ::= 'amdgpu_kernel'
 ///   ::= 'tailcc'
+///   ::= 'riscv_vector_cc'
 ///   ::= 'cc' UINT
 ///
 bool LLParser::parseOptionalCallingConv(unsigned &CC) {
@@ -2099,6 +2100,7 @@
     break;
   case lltok::kw_amdgpu_kernel:  CC = CallingConv::AMDGPU_KERNEL; break;
   case lltok::kw_tailcc:         CC = CallingConv::Tail; break;
+  case lltok::kw_riscv_vector_cc:CC = CallingConv::RISCV_VectorCall; break;
   case lltok::kw_cc: {
       Lex.Lex();
       return parseUInt32(CC);
Index: llvm/lib/AsmParser/LLLexer.cpp
===================================================================
--- llvm/lib/AsmParser/LLLexer.cpp
+++ llvm/lib/AsmParser/LLLexer.cpp
@@ -632,6 +632,7 @@
   KEYWORD(amdgpu_kernel);
   KEYWORD(amdgpu_gfx);
   KEYWORD(tailcc);
+  KEYWORD(riscv_vector_cc);
 
   KEYWORD(cc);
   KEYWORD(c);
Index: llvm/include/llvm/IR/CallingConv.h
===================================================================
--- llvm/include/llvm/IR/CallingConv.h
+++ llvm/include/llvm/IR/CallingConv.h
@@ -245,6 +245,9 @@
     /// placement. Preserves active lane values for input VGPRs.
     AMDGPU_CS_ChainPreserve = 105,
 
+    /// Calling convention used for RISC-V V-extension.
+    RISCV_VectorCall = 106,
+
     /// The highest possible ID. Must be some 2^k - 1.
     MaxID = 1023
   };
Index: llvm/include/llvm/AsmParser/LLToken.h
===================================================================
--- llvm/include/llvm/AsmParser/LLToken.h
+++ llvm/include/llvm/AsmParser/LLToken.h
@@ -175,6 +175,7 @@
   kw_amdgpu_kernel,
   kw_amdgpu_gfx,
   kw_tailcc,
+  kw_riscv_vector_cc,
 
   // Attributes:
   kw_attributes,
Index: clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGen/RISCV/riscv-vector-callingconv.cpp
@@ -0,0 +1,19 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 %s -triple riscv64 -target-feature +v -verify
+
+__attribute__((riscv_vector_cc)) int var; // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'int'}}
+
+__attribute__((riscv_vector_cc)) void func();
+__attribute__((riscv_vector_cc(1))) void func_invalid(); // expected-error {{'riscv_vector_cc' attribute takes no arguments}}
+
+void test_no_attribute(int); // expected-note {{previous declaration is here}}
+void __attribute__((riscv_vector_cc)) test_no_attribute(int x) { } // expected-error {{function declared 'riscv_vector_cc' here was previously declared without calling convention}}
+
+class test_cc {
+  __attribute__((riscv_vector_cc)) void member_func();
+};
+
+void test_lambda() {
+  __attribute__((riscv_vector_cc)) auto lambda = []() { // expected-warning {{'riscv_vector_cc' only applies to function types; type here is 'auto'}}
+  };
+}
Index: clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c
@@ -0,0 +1,27 @@
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
+// RUN:   -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-LLVM %s
+
+#include <riscv_vector.h>
+
+// CHECK-LLVM: call riscv_vector_cc <vscale x 2 x i32> @bar
+vint32m1_t __attribute__((riscv_vector_cc)) bar(vint32m1_t input);
+vint32m1_t test_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+  vint32m1_t ret;
+  vint32m1_t val;
+  val = __riscv_vle32_v_i32m1(base, vl);
+  ret = bar(input);
+  __riscv_vse32_v_i32m1(base, val, vl);
+  return ret;
+}
+
+// CHECK-LLVM: call <vscale x 2 x i32> @baz
+vint32m1_t baz(vint32m1_t input);
+vint32m1_t test_no_vector_cc_attr(vint32m1_t input, int32_t *base, size_t vl) {
+  vint32m1_t ret;
+  vint32m1_t val;
+  val = __riscv_vle32_v_i32m1(base, vl);
+  ret = baz(input);
+  __riscv_vse32_v_i32m1(base, val, vl);
+  return ret;
+}
Index: clang/lib/Sema/SemaType.cpp
===================================================================
--- clang/lib/Sema/SemaType.cpp
+++ clang/lib/Sema/SemaType.cpp
@@ -135,7 +135,8 @@
   case ParsedAttr::AT_Pcs:                                                     \
   case ParsedAttr::AT_IntelOclBicc:                                            \
   case ParsedAttr::AT_PreserveMost:                                            \
-  case ParsedAttr::AT_PreserveAll
+  case ParsedAttr::AT_PreserveAll:                                             \
+  case ParsedAttr::AT_RISCVVectorCC
 
 // Function type attributes.
 #define FUNCTION_TYPE_ATTRS_CASELIST                                           \
@@ -7768,6 +7769,8 @@
     return createSimpleAttr<PreserveMostAttr>(Ctx, Attr);
   case ParsedAttr::AT_PreserveAll:
     return createSimpleAttr<PreserveAllAttr>(Ctx, Attr);
+  case ParsedAttr::AT_RISCVVectorCC:
+    return createSimpleAttr<RISCVVectorCCAttr>(Ctx, Attr);
   }
   llvm_unreachable("unexpected attribute kind!");
 }
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5194,6 +5194,9 @@
   case ParsedAttr::AT_PreserveAll:
     D->addAttr(::new (S.Context) PreserveAllAttr(S.Context, AL));
     return;
+  case ParsedAttr::AT_RISCVVectorCC:
+    D->addAttr(::new (S.Context) RISCVVectorCCAttr(S.Context, AL));
+    return;
   default:
     llvm_unreachable("unexpected attribute kind");
   }
@@ -5393,6 +5396,9 @@
   case ParsedAttr::AT_PreserveAll:
     CC = CC_PreserveAll;
     break;
+  case ParsedAttr::AT_RISCVVectorCC:
+    CC = CC_RISCVVectorCall;
+    break;
   default: llvm_unreachable("unexpected attribute kind");
   }
 
@@ -9229,6 +9235,7 @@
   case ParsedAttr::AT_AArch64VectorPcs:
   case ParsedAttr::AT_AArch64SVEPcs:
   case ParsedAttr::AT_AMDGPUKernelCall:
+  case ParsedAttr::AT_RISCVVectorCC:
     handleCallConvAttr(S, D, AL);
     break;
   case ParsedAttr::AT_Suppress:
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -71,6 +71,7 @@
   case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
   case CC_Swift: return llvm::CallingConv::Swift;
   case CC_SwiftAsync: return llvm::CallingConv::SwiftTail;
+  case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall;
   }
 }
 
@@ -252,6 +253,9 @@
   if (D->hasAttr<PreserveAllAttr>())
     return CC_PreserveAll;
 
+  if (D->hasAttr<RISCVVectorCCAttr>())
+    return CC_RISCVVectorCall;
+
   return CC_C;
 }
 
Index: clang/lib/Basic/Targets/RISCV.h
===================================================================
--- clang/lib/Basic/Targets/RISCV.h
+++ clang/lib/Basic/Targets/RISCV.h
@@ -101,6 +101,8 @@
 
   bool hasBitIntType() const override { return true; }
 
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override;
+
   bool useFP16ConversionIntrinsics() const override {
     return false;
   }
Index: clang/lib/Basic/Targets/RISCV.cpp
===================================================================
--- clang/lib/Basic/Targets/RISCV.cpp
+++ clang/lib/Basic/Targets/RISCV.cpp
@@ -346,3 +346,14 @@
   bool Is64Bit = getTriple().isArch64Bit();
   llvm::RISCV::fillValidTuneCPUArchList(Values, Is64Bit);
 }
+
+TargetInfo::CallingConvCheckResult
+RISCVTargetInfo::checkCallingConvention(CallingConv CC) const {
+  switch (CC) {
+    default:
+      return CCCR_Warning;
+    case CC_C:
+    case CC_RISCVVectorCall:
+      return CCCR_OK;
+  }
+}
Index: clang/lib/AST/TypePrinter.cpp
===================================================================
--- clang/lib/AST/TypePrinter.cpp
+++ clang/lib/AST/TypePrinter.cpp
@@ -1035,6 +1035,9 @@
     case CC_PreserveAll:
       OS << " __attribute__((preserve_all))";
       break;
+    case CC_RISCVVectorCall:
+      OS << "__attribute__((riscv_vector_cc))";
+      break;
     }
   }
 
@@ -1855,6 +1858,9 @@
   case attr::PreserveAll:
     OS << "preserve_all";
     break;
+  case attr::RISCVVectorCC:
+    OS << "riscv_vector_cc";
+    break;
   case attr::NoDeref:
     OS << "noderef";
     break;
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -3353,6 +3353,7 @@
   case CC_SwiftAsync: return "swiftasynccall";
   case CC_PreserveMost: return "preserve_most";
   case CC_PreserveAll: return "preserve_all";
+  case CC_RISCVVectorCall: return "riscv_vector_cc";
   }
 
   llvm_unreachable("Invalid calling convention.");
@@ -3821,6 +3822,7 @@
   case attr::IntelOclBicc:
   case attr::PreserveMost:
   case attr::PreserveAll:
+  case attr::RISCVVectorCC:
     return true;
   }
   llvm_unreachable("invalid attr kind");
Index: clang/lib/AST/ItaniumMangle.cpp
===================================================================
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -3268,6 +3268,7 @@
   case CC_OpenCLKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
+  case CC_RISCVVectorCall:
     // FIXME: we should be mangling all of the above.
     return "";
 
Index: clang/include/clang/Basic/Specifiers.h
===================================================================
--- clang/include/clang/Basic/Specifiers.h
+++ clang/include/clang/Basic/Specifiers.h
@@ -288,6 +288,7 @@
     CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
     CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
     CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
+    CC_RISCVVectorCall, // __attribute__((riscv_vector_cc))
   };
 
   /// Checks whether the given calling convention supports variadic
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -5193,6 +5193,16 @@
   }];
 }
 
+def RISCVVectorCCDocs : Documentation {
+ let Category = DocCatCallingConvs;
+ let Content = [{
+The ``riscv_vector_cc`` attribute can be applied to a function. It preserves 15
+registers namely, v1-v7 and v24-v31 as callee-saved. Callers thus don't need
+to save these registers before function calls, and callees only need to save
+them only if they use them.
+ }];
+}
+
 def PreferredNameDocs : Documentation {
   let Category = DocCatDecl;
   let Content = [{
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -2748,6 +2748,11 @@
   let Documentation = [PreserveAllDocs];
 }
 
+def RISCVVectorCC: DeclOrTypeAttr {
+ let Spellings = [Clang<"riscv_vector_cc">];
+ let Documentation = [RISCVVectorCCDocs];
+}
+
 def Target : InheritableAttr {
   let Spellings = [GCC<"target">];
   let Args = [StringArgument<"featuresStr">];
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to