llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Greg Rodgers (gregrodgers)

<details>
<summary>Changes</summary>

This is the initial support for Emissary APIs as discussed here:

https://discourse.llvm.org/t/emissary-apis-a-general-purpose-framework-for-gpu-initiated-host-execution-of-native-host-apis/89169

Currently, the functions printf, fprintf, MPI_Send, MPI_Recv, and much of the 
Fortran IO Runtime is working.   The later provides the ability to have print 
and write FORTRAN statements in your target region.  

There is already printf and fprintf support in the device libc which uses the 
same offload RPC infrastructure that emissary uses.  To disable emissary printf 
and fprintf set -fno-use-emissary-print.  use-emissary-print is the default 
because it is much faster.   

---

Patch is 105.01 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/175265.diff


23 Files Affected:

- (modified) clang/include/clang/Basic/LangOptions.def (+1) 
- (modified) clang/include/clang/Options/Options.td (+6) 
- (added) clang/lib/CodeGen/CGEmitEmissaryExec.cpp (+371) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+11) 
- (modified) clang/lib/CodeGen/CMakeLists.txt (+1) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+1) 
- (modified) clang/lib/Driver/ToolChains/Clang.cpp (+7) 
- (modified) clang/lib/Headers/CMakeLists.txt (+2) 
- (added) clang/lib/Headers/EmissaryIds.h (+97) 
- (added) clang/lib/Headers/EmissaryMPI.h (+202) 
- (modified) clang/lib/Headers/llvm_libc_wrappers/stdio.h (+17) 
- (modified) offload/include/Shared/RPCOpcodes.h (+2) 
- (modified) offload/libomptarget/CMakeLists.txt (+1) 
- (modified) offload/plugins-nextgen/common/CMakeLists.txt (+15) 
- (added) offload/plugins-nextgen/common/include/Emissary.h (+263) 
- (added) offload/plugins-nextgen/common/src/Emissary.cpp (+259) 
- (added) offload/plugins-nextgen/common/src/EmissaryFortrt.cpp (+470) 
- (added) offload/plugins-nextgen/common/src/EmissaryPrint.cpp (+423) 
- (modified) offload/plugins-nextgen/common/src/RPC.cpp (+152) 
- (modified) openmp/device/CMakeLists.txt (+10) 
- (added) openmp/device/src/EmissaryFortrt.cpp (+144) 
- (added) openmp/device/src/EmissaryPrint.cpp (+79) 
- (modified) openmp/device/src/Misc.cpp (+62) 


``````````diff
diff --git a/clang/include/clang/Basic/LangOptions.def 
b/clang/include/clang/Basic/LangOptions.def
index 8cba1dbaee24e..69ff43cef7307 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -227,6 +227,7 @@ LANGOPT(OpenMPExtensions  , 1, 1, NotCompatible, "Enable 
all Clang extensions fo
 LANGOPT(OpenMPSimd        , 1, 0, NotCompatible, "Use SIMD only OpenMP 
support.")
 LANGOPT(OpenMPUseTLS      , 1, 0, NotCompatible, "Use TLS for threadprivates 
or runtime calls")
 LANGOPT(OpenMPIsTargetDevice    , 1, 0, NotCompatible, "Generate code only for 
OpenMP target device")
+LANGOPT(UseEmissaryPrint  , 1, 0, NotCompatible, "Enables use of certain IO 
functions with Emissary rather than LIBC")
 LANGOPT(OpenMPCUDAMode    , 1, 0, NotCompatible, "Generate code for OpenMP 
pragmas in SIMT/SPMD mode")
 LANGOPT(OpenMPIRBuilder   , 1, 0, NotCompatible, "Use the experimental 
OpenMP-IR-Builder codegen path.")
 LANGOPT(OpenMPCUDANumSMs  , 32, 0, NotCompatible, "Number of SMs for CUDA 
devices.")
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 6a72931727a7c..8cf017d8effd7 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -8940,6 +8940,12 @@ def fopenmp_host_ir_file_path : Separate<["-"], 
"fopenmp-host-ir-file-path">,
 
 } // let Visibility = [CC1Option, FC1Option]
 
+defm use_emissary_print: BoolFOption<"use-emissary-print",
+  LangOpts<"UseEmissaryPrint">, DefaultTrue,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+  "Enable use of Emissary printf/fprint overriding device libc 
printf/fprintf">,
+  NegFlag<SetFalse>>;
+
 
//===----------------------------------------------------------------------===//
 // Coarray Options
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGEmitEmissaryExec.cpp 
b/clang/lib/CodeGen/CGEmitEmissaryExec.cpp
new file mode 100644
index 0000000000000..cc574d1ffdcf2
--- /dev/null
+++ b/clang/lib/CodeGen/CGEmitEmissaryExec.cpp
@@ -0,0 +1,371 @@
+//===------- CGEmitEmissaryExec.cpp - Codegen for _emissary_exec 
--==------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EmitEmissaryExec:
+//
+// When a device call to the varadic function _emissary_exec is encountered
+// (in CGExpr.cpp) EmitEmissaryExec does these steps:
+//
+// 1. If string lens are runtime dependent, Emit code to determine runtime len.
+// 2. Emits call to allocate memory __llvm_emissary_premalloc,
+// 3. Emit stores of each arg into arg buffer,
+// 4. Emits call to function __llvm_emissary_rpc or __llvm_emissary_rpc_dm
+//
+// The arg buffer is a struct that contains the length, number of args, an
+// array of 4-byte keys that represent the type of of each arg, an array of
+// aligned "data" values for each arg, and finally the runtime string values.
+// If an arg is a string the data value is the runtime length of the string.
+// Each 4-byte key contains the llvm type ID and the number of bits for the
+// type. encoded by the macro _PACK_TY_BITLEN(x,y) ((uint32_t)x << 16) |
+// ((uint32_t)y)
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../../clang/lib/Headers/EmissaryIds.h"
+#include "CodeGenFunction.h"
+#include "clang/Basic/Builtins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+// These static helper functions support EmitEmissaryExec.
+static llvm::Function *GetOmpStrlenDeclaration(CodeGenModule &CGM) {
+  auto &M = CGM.getModule();
+  // Args are pointer to char and maxstringlen
+  llvm::Type *ArgTypes[] = {CGM.Int8PtrTy, CGM.Int32Ty};
+  llvm::FunctionType *OmpStrlenFTy =
+      llvm::FunctionType::get(CGM.Int32Ty, ArgTypes, false);
+  if (auto *F = M.getFunction("__strlen_max")) {
+    assert(F->getFunctionType() == OmpStrlenFTy);
+    return F;
+  }
+  llvm::Function *FN = llvm::Function::Create(
+      OmpStrlenFTy, llvm::GlobalVariable::ExternalLinkage, "__strlen_max", &M);
+  return FN;
+}
+
+// Deterimines if an expression is a string with variable lenth
+static bool isVarString(const clang::Expr *argX, const clang::Type *argXTy,
+                        const llvm::Value *Arg) {
+  if ((argXTy->isPointerType() || argXTy->isConstantArrayType()) &&
+      argXTy->getPointeeOrArrayElementType()->isCharType() && 
!argX->isLValue())
+    return true;
+  // Ensure the VarDecl has an inititalizer
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(argX))
+    if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+      if (!VD->getInit() ||
+          !llvm::isa<StringLiteral>(VD->getInit()->IgnoreImplicit()))
+        return true;
+  return false;
+}
+
+// Deterimines if an argument is a string
+static bool isString(const clang::Type *argXTy) {
+  if ((argXTy->isPointerType() || argXTy->isConstantArrayType()) &&
+      argXTy->getPointeeOrArrayElementType()->isCharType())
+    return true;
+  else
+    return false;
+}
+
+// Gets a string literal to write into the transfer buffer
+static const StringLiteral *getSL(const clang::Expr *argX,
+                                  const clang::Type *argXTy) {
+  // String in argX has known constant length
+  if (!argXTy->isConstantArrayType()) {
+    // Allow constant string to be a declared variable,
+    // But it must be constant and initialized.
+    const DeclRefExpr *DRE = cast<DeclRefExpr>(argX);
+    const VarDecl *VarD = cast<VarDecl>(DRE->getDecl());
+    argX = VarD->getInit()->IgnoreImplicit();
+  }
+  const StringLiteral *SL = cast<StringLiteral>(argX);
+  return SL;
+}
+
+// Returns a function pointer to __llvm_emissary_premalloc
+static llvm::Function *GetEmissaryAllocDeclaration(CodeGenModule &CGM) {
+  auto &M = CGM.getModule();
+  // clang::CodeGen::CodeGenTypes &CGT = CGM.getTypes();
+  const char *_executeName = "__llvm_emissary_premalloc";
+  llvm::Type *ArgTypes[] = {CGM.Int32Ty};
+  llvm::Function *FN;
+  // Maybe this should be pointer to char instead of pointer to void
+  llvm::FunctionType *VargsFnAllocFuncType = llvm::FunctionType::get(
+      CGM.getTypes().ConvertType(
+          CGM.getContext().getPointerType(CGM.getContext().VoidTy)),
+      ArgTypes, false);
+  if (!(FN = M.getFunction(_executeName)))
+    FN = llvm::Function::Create(VargsFnAllocFuncType,
+                                llvm::GlobalVariable::ExternalLinkage,
+                                _executeName, &M);
+  assert(FN->getFunctionType() == VargsFnAllocFuncType);
+  return FN;
+}
+
+// Returns a function pointer to __llvm_emissary_rpc
+static llvm::Function *GetEmissaryExecDeclaration(CodeGenModule &CGM,
+                                                  bool hasXfers) {
+  const char *_executeName =
+      hasXfers ? "__llvm_emissary_rpc_dm" : "__llvm_emissary_rpc";
+  auto &M = CGM.getModule();
+  llvm::Type *ArgTypes[] = {
+      CGM.Int32Ty, CGM.getTypes().ConvertType(CGM.getContext().getPointerType(
+                       CGM.getContext().VoidTy))};
+  llvm::Function *FN;
+  llvm::FunctionType *VarfnFuncType =
+      llvm::FunctionType::get(CGM.Int64Ty, ArgTypes, false);
+  if (!(FN = M.getFunction(_executeName)))
+    FN = llvm::Function::Create(
+        VarfnFuncType, llvm::GlobalVariable::ExternalLinkage, _executeName, 
&M);
+  assert(FN->getFunctionType() == VarfnFuncType);
+  return FN;
+}
+
+// A macro to pack the llvm type ID and numbits into 4-byte key
+#define _PACK_TY_BITLEN(x, y) ((uint32_t)x << 16) | ((uint32_t)y)
+
+//  ----- External function EmitEmissaryExec called from CGExpr.cpp -----
+RValue CodeGenFunction::EmitEmissaryExec(const CallExpr *E) {
+  assert(getTarget().getTriple().isAMDGCN() ||
+         getTarget().getTriple().isNVPTX());
+  assert(E->getNumArgs() >= 1); // _emissary_exec always has at least one arg.
+  const llvm::DataLayout &DL = CGM.getDataLayout();
+  CallArgList Args;
+
+  EmitCallArgs(Args,
+               E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+               E->arguments(), E->getDirectCallee(),
+               /* ParamsToSkip = */ 0);
+
+  // We don't know how to emit non-scalar varargs.
+  if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) {
+        return !A.getRValue(*this).isScalar();
+      })) {
+    CGM.ErrorUnsupported(E, "non-scalar arg in GPU vargs function");
+    return RValue::get(llvm::ConstantInt::get(IntTy, 0));
+  }
+  // NumArgs always includes emisid, but E->getNumArgs() could be 1 less if
+  // inserted it above.
+  unsigned NumArgs = (unsigned)Args.size();
+  llvm::SmallVector<llvm::Type *, 32> ArgTypes;
+  llvm::SmallVector<llvm::Value *, 32> VarStrLengths;
+  llvm::Value *TotalVarStrsLength = llvm::ConstantInt::get(Int32Ty, 0);
+  bool hasVarStrings = false;
+  ArgTypes.push_back(Int32Ty); // 1st field in struct is total DataLen
+  ArgTypes.push_back(Int32Ty); // 2nd field in struct will be num args
+  // An array of 4-byte keys that describe the arg type
+  for (unsigned I = 0; I < NumArgs; ++I)
+    ArgTypes.push_back(Int32Ty);
+
+  // Track the size of the numeric data length and string length
+  unsigned DataLen_CT =
+      (unsigned)(DL.getTypeAllocSize(Int32Ty)) * (NumArgs + 2);
+  unsigned AllStringsLen_CT = 0;
+
+  // ---  1st Pass over Args to create ArgTypes and count size ---
+  size_t structOffset = 4 * (NumArgs + 2);
+  for (unsigned I = 0; I < NumArgs; I++) {
+    llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal();
+    llvm::Type *ArgType = Arg->getType();
+    // Skip string processing on arg0 which may not be in E->getArg(0)
+    if (I != 0) {
+      const Expr *argX = E->getArg(I)->IgnoreParenCasts();
+      auto *argXTy = argX->getType().getTypePtr();
+      if (isString(argXTy)) {
+        if (isVarString(argX, argXTy, Arg)) {
+          hasVarStrings = true;
+          if (auto *PtrTy = dyn_cast<llvm::PointerType>(ArgType))
+            if (PtrTy->getPointerAddressSpace()) {
+              Arg = Builder.CreateAddrSpaceCast(Arg, CGM.Int8PtrTy);
+              ArgType = Arg->getType();
+            }
+          llvm::Value *VarStrLen =
+              Builder.CreateCall(GetOmpStrlenDeclaration(CGM),
+                                 {Arg, llvm::ConstantInt::get(Int32Ty, 1024)});
+          VarStrLengths.push_back(VarStrLen);
+          TotalVarStrsLength = Builder.CreateAdd(TotalVarStrsLength, VarStrLen,
+                                                 "sum_of_var_strings_length");
+          ArgType = Int32Ty;
+        } else {
+          const StringLiteral *SL = getSL(argX, argXTy);
+          StringRef ArgString = SL->getString();
+          AllStringsLen_CT += ((int)ArgString.size() + 1);
+          // change ArgType from char ptr to int to contain string length
+          ArgType = Int32Ty;
+        }
+      } // end of processing string argument
+    } // End of skip 1st arg
+    // if ArgTypeSize is >4 bytes we need to insert dummy align
+    // values in the struct so all stores can be aligned .
+    // These dummy fields must be inserted before the arg.
+    //
+    // In the pass below where the stores are generated careful
+    // tracking of the index into the struct is necessary.
+    size_t needsPadding = (structOffset % 
(size_t)DL.getTypeAllocSize(ArgType));
+    if (needsPadding) {
+      DataLen_CT += (unsigned)needsPadding;
+      structOffset += needsPadding;
+      ArgTypes.push_back(Int32Ty); // could assert that needsPadding == 4 here
+    }
+
+    ArgTypes.push_back(ArgType);
+    DataLen_CT += ((int)DL.getTypeAllocSize(ArgType));
+    structOffset += (size_t)DL.getTypeAllocSize(ArgType);
+  }
+
+  // ---  Generate call to __llvm_emissary_premalloc to get data pointer
+  if (hasVarStrings)
+    TotalVarStrsLength = Builder.CreateAdd(
+        TotalVarStrsLength,
+        llvm::ConstantInt::get(Int32Ty, AllStringsLen_CT + DataLen_CT),
+        "total_buffer_size");
+  llvm::Value *BufferLen =
+      hasVarStrings
+          ? TotalVarStrsLength
+          : llvm::ConstantInt::get(Int32Ty, AllStringsLen_CT + DataLen_CT);
+  llvm::Value *DataStructPtr =
+      Builder.CreateCall(GetEmissaryAllocDeclaration(CGM), {BufferLen});
+
+  // --- Cast the generic return pointer to be a struct in device global memory
+  llvm::StructType *DataStructTy =
+      llvm::StructType::create(ArgTypes, "varfn_args_store");
+  unsigned AS = getContext().getTargetAddressSpace(LangAS::cuda_device);
+  llvm::Value *BufferPtr = Builder.CreatePointerCast(
+      DataStructPtr, llvm::PointerType::get(CGM.getLLVMContext(), AS),
+      "varfn_args_store_casted");
+  // ---  Header of struct contains length and NumArgs ---
+  llvm::Value *DataLenField = llvm::ConstantInt::get(Int32Ty, DataLen_CT);
+  llvm::Value *P = Builder.CreateStructGEP(DataStructTy, BufferPtr, 0);
+  Builder.CreateAlignedStore(DataLenField, P,
+                             DL.getPrefTypeAlign(DataLenField->getType()));
+  llvm::Value *NumArgsField = llvm::ConstantInt::get(Int32Ty, NumArgs);
+  P = Builder.CreateStructGEP(DataStructTy, BufferPtr, 1);
+  Builder.CreateAlignedStore(NumArgsField, P,
+                             DL.getPrefTypeAlign(NumArgsField->getType()));
+
+  // ---  2nd Pass: create array of 4-byte keys to describe each arg
+  for (unsigned I = 0; I < NumArgs; I++) {
+    llvm::Type *ty = Args[I].getRValue(*this).getScalarVal()->getType();
+    llvm::Type::TypeID argtypeid =
+        Args[I].getRValue(*this).getScalarVal()->getType()->getTypeID();
+
+    // Get type size in bits. Usually 64 or 32.
+    uint32_t numbits = 0;
+    if (I > 0 &&
+        isString(E->getArg(I)->IgnoreParenCasts()->getType().getTypePtr()))
+      // The llvm typeID for string is pointer.  Since pointer numbits is 0,
+      // we set numbits to 1 to distinguish pointer type ID as string pointer.
+      numbits = 1;
+    else
+      numbits = ty->getScalarSizeInBits();
+    // Create a key that combines llvm typeID and size
+    llvm::Value *Key =
+        llvm::ConstantInt::get(Int32Ty, _PACK_TY_BITLEN(argtypeid, numbits));
+    P = Builder.CreateStructGEP(DataStructTy, BufferPtr, I + 2);
+    Builder.CreateAlignedStore(Key, P, DL.getPrefTypeAlign(Key->getType()));
+  }
+
+  // ---  3rd Pass: Store data values for each arg ---
+  unsigned varstring_index = 0;
+  unsigned structIndex = 2 + NumArgs;
+  structOffset = 4 * structIndex;
+  bool hasXfers;
+  for (unsigned I = 0; I < NumArgs; I++) {
+    llvm::Value *Arg;
+    if (I == 0) {
+      Arg = Args[I].getKnownRValue().getScalarVal();
+      llvm::ConstantInt *CI = llvm::dyn_cast<llvm::ConstantInt>(Arg);
+      uint64_t uint64value = CI->getZExtValue();
+      uint32_t lower_32 = (uint32_t)(uint64value & 0xFFFFFFFF);
+      hasXfers = lower_32 ? true : false;
+    } else {
+      const Expr *argX = E->getArg(I)->IgnoreParenCasts();
+      auto *argXTy = argX->getType().getTypePtr();
+      if (isString(argXTy)) {
+        if (isVarString(argX, argXTy, Arg)) {
+          Arg = VarStrLengths[varstring_index];
+          varstring_index++;
+        } else {
+          const StringLiteral *SL = getSL(argX, argXTy);
+          StringRef ArgString = SL->getString();
+          int ArgStrLen = (int)ArgString.size() + 1;
+          // Change Arg from a char pointer to the integer string length
+          Arg = llvm::ConstantInt::get(Int32Ty, ArgStrLen);
+        }
+      } else {
+        Arg = Args[I].getKnownRValue().getScalarVal();
+      }
+    }
+    size_t structElementSize = (size_t)DL.getTypeAllocSize(Arg->getType());
+    size_t needsPadding = (structOffset % structElementSize);
+    if (needsPadding) {
+      // Skip over dummy fields in struct to align
+      structOffset += needsPadding; // should assert needsPadding == 4
+      structIndex++;
+    }
+    P = Builder.CreateStructGEP(DataStructTy, BufferPtr, structIndex);
+    Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
+    structOffset += structElementSize;
+    structIndex++;
+  }
+
+  // ---  4th Pass: memcpy all strings after the data values ---
+  // bitcast the struct in device global memory as a char buffer
+  Address BufferPtrByteAddr =
+      Address(Builder.CreatePointerCast(
+                  BufferPtr, llvm::PointerType::get(CGM.getLLVMContext(), AS),
+                  "_casted"),
+              Int8Ty, CharUnits::fromQuantity(1));
+
+  // BufferPtrByteAddr is a pointer to where we want to write the next string
+  BufferPtrByteAddr = Builder.CreateConstInBoundsByteGEP(
+      BufferPtrByteAddr, CharUnits::fromQuantity(DataLen_CT));
+  varstring_index = 0;
+  // Skip string processing on arg0 which may not be in E->getArg(0)
+  for (unsigned I = 1; I < NumArgs; ++I) {
+    llvm::Value *Arg = Args[I].getKnownRValue().getScalarVal();
+    const Expr *argX = E->getArg(I)->IgnoreParenCasts();
+    auto *argXTy = argX->getType().getTypePtr();
+    if (isString(argXTy)) {
+      if (isVarString(argX, argXTy, Arg)) {
+        llvm::Value *varStrLength = VarStrLengths[varstring_index];
+        varstring_index++;
+        Address SrcAddr = Address(Arg, Int8Ty, CharUnits::fromQuantity(1));
+        Builder.CreateMemCpy(BufferPtrByteAddr, SrcAddr, varStrLength);
+        // update BufferPtrByteAddr for next string memcpy
+        llvm::Value *PtrAsInt = BufferPtrByteAddr.emitRawPointer(*this);
+        BufferPtrByteAddr =
+            Address(Builder.CreateGEP(Int8Ty, PtrAsInt,
+                                      ArrayRef<llvm::Value *>(varStrLength)),
+                    Int8Ty, CharUnits::fromQuantity(1));
+      } else {
+        const StringLiteral *SL = getSL(argX, argXTy);
+        StringRef ArgString = SL->getString();
+        int ArgStrLen = (int)ArgString.size() + 1;
+        Address SrcAddr = CGM.GetAddrOfConstantStringFromLiteral(SL);
+        Builder.CreateMemCpy(BufferPtrByteAddr, SrcAddr, ArgStrLen);
+        // update BufferPtrByteAddr for next memcpy
+        BufferPtrByteAddr = Builder.CreateConstInBoundsByteGEP(
+            BufferPtrByteAddr, CharUnits::fromQuantity(ArgStrLen));
+      }
+    }
+  }
+  // --- Generate call to __llvm_emissary_rpc and return RValue
+  llvm::Value *emis_rc = Builder.CreateCall(
+      GetEmissaryExecDeclaration(CGM, hasXfers), {BufferLen, DataStructPtr});
+  // truncate long long int to int for printf return value.
+  if ((E->getDirectCallee()->getNameAsString() == "fprintf") ||
+      (E->getDirectCallee()->getNameAsString() == "printf"))
+    emis_rc = Builder.CreateTrunc(emis_rc, CGM.Int32Ty, "emis_rc");
+  return RValue::get(emis_rc);
+}
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6309c37788f0c..a4ee21d968b5e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6829,6 +6829,17 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
       StaticOperator = true;
   }
 
+  // Replace calls to _emissary_exec found in emissary device stubs with calls
+  // to either __llvm_emissary_rpc or __llvm_emissary_rpc_dm. Before the call
+  // EmitEmissaryExec generates code to allocate an arg buffer and to fill the
+  // arg buffer.
+  if ((CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()) && FnType &&
+      dyn_cast<FunctionProtoType>(FnType) &&
+      dyn_cast<FunctionProtoType>(FnType)->isVariadic() &&
+      (E->getDirectCallee()->getNameAsString() == "_emissary_exec") &&
+      CGM.getLangOpts().OpenMP)
+    return EmitEmissaryExec(E);
+
   auto Arguments = E->arguments();
   if (StaticOperator) {
     // If we're calling a static operator, we need to emit the object argument
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index dbbc35b372f42..f699cee7fea11 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -62,6 +62,7 @@ add_clang_library(clangCodeGen
   CGAtomic.cpp
   CGBlocks.cpp
   CGBuiltin.cpp
+  CGEmitEmissaryExec.cpp
   CGCUDANV.cpp
   CGCUDARuntime.cpp
   CGCXX.cpp
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 855e43631f436..3e962e5d0e34c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/Co...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/175265
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to