llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) <details> <summary>Changes</summary> Lower locals allocation of cuda device, managed and unified variables to fir.cuda_alloc. Add fir.cuda_free in the function context finalization. --- Full diff: https://github.com/llvm/llvm-project/pull/90526.diff 6 Files Affected: - (modified) flang/include/flang/Optimizer/Builder/FIRBuilder.h (+7) - (modified) flang/include/flang/Semantics/tools.h (+17) - (modified) flang/lib/Lower/ConvertVariable.cpp (+29) - (modified) flang/lib/Optimizer/Builder/FIRBuilder.cpp (+14-11) - (modified) flang/lib/Optimizer/Dialect/FIROps.cpp (+15) - (modified) flang/test/Lower/CUDA/cuda-data-attribute.cuf (+25) ``````````diff diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index e4c954159f71be..0d650f830b64e0 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -708,6 +708,13 @@ mlir::Value createNullBoxProc(fir::FirOpBuilder &builder, mlir::Location loc, /// Set internal linkage attribute on a function. void setInternalLinkage(mlir::func::FuncOp); + +llvm::SmallVector<mlir::Value> +elideExtentsAlreadyInType(mlir::Type type, mlir::ValueRange shape); + +llvm::SmallVector<mlir::Value> +elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams); + } // namespace fir::factory #endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index da10969ebc7021..c9eb5bc857ac01 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -222,6 +222,23 @@ inline bool HasCUDAAttr(const Symbol &sym) { return false; } +inline bool NeedCUDAAlloc(const Symbol &sym) { + bool inDeviceSubprogram{IsCUDADeviceContext(&sym.owner())}; + if (const auto *details{ + sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()}) { + if (details->cudaDataAttr() && + (*details->cudaDataAttr() == common::CUDADataAttr::Device || + *details->cudaDataAttr() == common::CUDADataAttr::Managed || + *details->cudaDataAttr() == common::CUDADataAttr::Unified)) { + // Descriptor is allocated on host when in host context. + if (Fortran::semantics::IsAllocatable(sym)) + return inDeviceSubprogram; + return true; + } + } + return false; +} + const Scope *FindCUDADeviceContext(const Scope *); std::optional<common::CUDADataAttr> GetCUDADataAttr(const Symbol *); diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 21db0cac11bf6a..9a17acf5b15c36 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -693,6 +693,22 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, if (ultimateSymbol.test(Fortran::semantics::Symbol::Flag::CrayPointee)) return builder.create<fir::ZeroOp>(loc, fir::ReferenceType::get(ty)); + if (Fortran::semantics::NeedCUDAAlloc(ultimateSymbol)) { + fir::CUDADataAttributeAttr cudaAttr = + Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), + ultimateSymbol); + llvm::SmallVector<mlir::Value> indices; + llvm::SmallVector<mlir::Value> elidedShape = + fir::factory::elideExtentsAlreadyInType(ty, shape); + llvm::SmallVector<mlir::Value> elidedLenParams = + fir::factory::elideLengthsAlreadyInType(ty, lenParams); + auto idxTy = builder.getIndexType(); + for (mlir::Value sh : elidedShape) + indices.push_back(builder.createConvert(loc, idxTy, sh)); + return builder.create<fir::CUDAAllocOp>(loc, ty, nm, symNm, cudaAttr, + lenParams, indices); + } + // Let the builder do all the heavy lifting. if (!Fortran::semantics::IsProcedurePointer(ultimateSymbol)) return builder.allocateLocal(loc, ty, nm, symNm, shape, lenParams, isTarg); @@ -927,6 +943,19 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, }); } } + if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) { + auto *builder = &converter.getFirOpBuilder(); + mlir::Location loc = converter.getCurrentLocation(); + fir::ExtendedValue exv = + converter.getSymbolExtendedValue(var.getSymbol(), &symMap); + auto *sym = &var.getSymbol(); + converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() { + fir::CUDADataAttributeAttr cudaAttr = + Fortran::lower::translateSymbolCUDADataAttribute( + builder->getContext(), *sym); + builder->create<fir::CUDAFreeOp>(loc, fir::getBase(exv), cudaAttr); + }); + } } //===----------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index a0fbae5b614cc7..a813b646087d73 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -176,8 +176,9 @@ mlir::Value fir::FirOpBuilder::createRealConstant(mlir::Location loc, llvm_unreachable("should use builtin floating-point type"); } -static llvm::SmallVector<mlir::Value> -elideExtentsAlreadyInType(mlir::Type type, mlir::ValueRange shape) { +llvm::SmallVector<mlir::Value> +fir::factory::elideExtentsAlreadyInType(mlir::Type type, + mlir::ValueRange shape) { auto arrTy = type.dyn_cast<fir::SequenceType>(); if (shape.empty() || !arrTy) return {}; @@ -191,8 +192,9 @@ elideExtentsAlreadyInType(mlir::Type type, mlir::ValueRange shape) { return dynamicShape; } -static llvm::SmallVector<mlir::Value> -elideLengthsAlreadyInType(mlir::Type type, mlir::ValueRange lenParams) { +llvm::SmallVector<mlir::Value> +fir::factory::elideLengthsAlreadyInType(mlir::Type type, + mlir::ValueRange lenParams) { if (lenParams.empty()) return {}; if (auto arrTy = type.dyn_cast<fir::SequenceType>()) @@ -211,9 +213,9 @@ mlir::Value fir::FirOpBuilder::allocateLocal( // Convert the shape extents to `index`, as needed. llvm::SmallVector<mlir::Value> indices; llvm::SmallVector<mlir::Value> elidedShape = - elideExtentsAlreadyInType(ty, shape); + fir::factory::elideExtentsAlreadyInType(ty, shape); llvm::SmallVector<mlir::Value> elidedLenParams = - elideLengthsAlreadyInType(ty, lenParams); + fir::factory::elideLengthsAlreadyInType(ty, lenParams); auto idxTy = getIndexType(); for (mlir::Value sh : elidedShape) indices.push_back(createConvert(loc, idxTy, sh)); @@ -283,9 +285,9 @@ fir::FirOpBuilder::createTemporary(mlir::Location loc, mlir::Type type, mlir::ValueRange lenParams, llvm::ArrayRef<mlir::NamedAttribute> attrs) { llvm::SmallVector<mlir::Value> dynamicShape = - elideExtentsAlreadyInType(type, shape); + fir::factory::elideExtentsAlreadyInType(type, shape); llvm::SmallVector<mlir::Value> dynamicLength = - elideLengthsAlreadyInType(type, lenParams); + fir::factory::elideLengthsAlreadyInType(type, lenParams); InsertPoint insPt; const bool hoistAlloc = dynamicShape.empty() && dynamicLength.empty(); if (hoistAlloc) { @@ -306,9 +308,9 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary( mlir::ValueRange shape, mlir::ValueRange lenParams, llvm::ArrayRef<mlir::NamedAttribute> attrs) { llvm::SmallVector<mlir::Value> dynamicShape = - elideExtentsAlreadyInType(type, shape); + fir::factory::elideExtentsAlreadyInType(type, shape); llvm::SmallVector<mlir::Value> dynamicLength = - elideLengthsAlreadyInType(type, lenParams); + fir::factory::elideLengthsAlreadyInType(type, lenParams); assert(!type.isa<fir::ReferenceType>() && "cannot be a reference"); return create<fir::AllocMemOp>(loc, type, /*unique_name=*/llvm::StringRef{}, @@ -659,7 +661,8 @@ mlir::Value fir::FirOpBuilder::createBox(mlir::Location loc, mlir::Type boxType, mlir::Type valueOrSequenceType = fir::unwrapPassByRefType(boxType); return create<fir::EmboxOp>( loc, boxType, addr, shape, slice, - elideLengthsAlreadyInType(valueOrSequenceType, lengths), tdesc); + fir::factory::elideLengthsAlreadyInType(valueOrSequenceType, lengths), + tdesc); } void fir::FirOpBuilder::dumpFunc() { getFunction().dump(); } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 24af94f9b90a1d..cbdc6a73dd34d1 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4031,6 +4031,21 @@ mlir::LogicalResult fir::CUDADeallocateOp::verify() { return mlir::success(); } +void fir::CUDAAllocOp::build( + mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType, + llvm::StringRef uniqName, llvm::StringRef bindcName, + fir::CUDADataAttributeAttr cudaAttr, mlir::ValueRange typeparams, + mlir::ValueRange shape, llvm::ArrayRef<mlir::NamedAttribute> attributes) { + mlir::StringAttr nameAttr = + uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName); + mlir::StringAttr bindcAttr = + bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName); + build(builder, result, wrapAllocaResultType(inType), + mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape, + cudaAttr); + result.addAttributes(attributes); +} + //===----------------------------------------------------------------------===// // FIROpsDialect //===----------------------------------------------------------------------===// diff --git a/flang/test/Lower/CUDA/cuda-data-attribute.cuf b/flang/test/Lower/CUDA/cuda-data-attribute.cuf index 937c981bddd368..083a3cacc02062 100644 --- a/flang/test/Lower/CUDA/cuda-data-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-data-attribute.cuf @@ -62,4 +62,29 @@ end subroutine ! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "du", fir.cuda_attr = #fir.cuda<unified>}) ! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>) +subroutine cuda_alloc_free(n) + integer :: n + real, device :: a(10) + integer, unified :: u + real, managed :: b(n) +end + +! CHECK-LABEL: func.func @_QMcuda_varPcuda_alloc_free +! CHECK: %[[ALLOC_A:.*]] = fir.cuda_alloc !fir.array<10xf32> {bindc_name = "a", cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>> +! CHECK: %[[SHAPE:.*]] = fir.shape %c10 : (index) -> !fir.shape<1> +! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) + +! CHECK: %[[ALLOC_U:.*]] = fir.cuda_alloc i32 {bindc_name = "u", cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32> +! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + +! CHECK: %[[ALLOC_B:.*]] = fir.cuda_alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>> +! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1> +! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) + +! CHECK: fir.cuda_free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {cuda_attr = #fir.cuda<managed>} +! CHECK: fir.cuda_free %[[DECL_U]]#1 : !fir.ref<i32> {cuda_attr = #fir.cuda<unified>} +! CHECK: fir.cuda_free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {cuda_attr = #fir.cuda<device>} + end module + + `````````` </details> https://github.com/llvm/llvm-project/pull/90526 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits