Issue 76577
Summary [MLIR][OpenMP][Libomptarget] `omp.target` access memory raise errors
Labels mlir
Assignees
Reporter EllisLambda
    The MLIR and LLVM toolchain was built with https://github.com/llvm/llvm-project/commit/8c6172b0ac2b254dec7d57326abfd666a7954a03. The MLIR code use `mlir-translate --mlir-to-llvmir| clang++ -c -x ir -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx1103` to generate static lib
```
module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.target = #omp.target<target_cpu = "gfx1103", target_features = "">} {
 llvm.func @llvm_omp_target_alloc_device(i64, i32) -> !llvm.ptr
  llvm.func @omp_get_default_device() -> i32
  llvm.func @_QQmain_omp_outline_1() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost)>} {
    %0 = llvm.mlir.zero : !llvm.ptr
    %1 = llvm.call @omp_get_default_device() : () -> i32
    %2 = llvm.getelementptr %0[67108864] : (!llvm.ptr) -> !llvm.ptr, f64
    %3 = llvm.ptrtoint %2 : !llvm.ptr to i64
    %4 = llvm.call @llvm_omp_target_alloc_device(%3, %1) : (i64, i32) -> !llvm.ptr
    %5 = llvm.call @llvm_omp_target_alloc_device(%3, %1) : (i64, i32) -> !llvm.ptr
    %6 = omp.map_info var_ptr(%4 : !llvm.ptr, f64) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr
    %7 = omp.map_info var_ptr(%5 : !llvm.ptr, f64) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr
    omp.target map_entries(%6 -> %arg0, %7 -> %arg1 : !llvm.ptr, !llvm.ptr) {
 ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
      %8 = llvm.mlir.constant(0 : index) : i64
      %9 = llvm.mlir.constant(1 : index) : i64
      %10 = llvm.mlir.constant(8192 : index) : i64
      omp.teams {
 omp.parallel {
          omp.wsloop for  (%arg2, %arg3, %arg4, %arg5) : i64 = (%8, %8, %8, %8) to (%10, %10, %10, %10) step (%9, %9, %9, %9) {
 %11 = llvm.mul %arg2, %10  : i64
            %12 = llvm.add %11, %arg3  : i64
            %13 = llvm.load %arg0 {alignment = 8 : i64} : !llvm.ptr -> vector<16xf64>
            %14 = llvm.load %arg1 {alignment = 8 : i64} : !llvm.ptr -> vector<16xf64>
            %15 = llvm.mul %arg3, %10  : i64
            %16 = llvm.add %15, %arg2  : i64
            %17 = llvm.getelementptr %arg1[%16] : (!llvm.ptr, i64) -> !llvm.ptr, f64
 %18 = llvm.load %17 {alignment = 8 : i64} : !llvm.ptr -> vector<16xf64>
            %19 = llvm.fmul %13, %14  : vector<16xf64>
 %20 = llvm.fdiv %14, %18  : vector<16xf64>
            %21 = llvm.fadd %19, %20  : vector<16xf64>
            %22 = llvm.getelementptr %arg1[%12] : (!llvm.ptr, i64) -> !llvm.ptr, f64
            llvm.store %21, %22 {alignment = 8 : i64} : vector<16xf64>, !llvm.ptr
 omp.terminator
          }
          omp.terminator
        }
 omp.terminator
      }
      omp.terminator
    }
 omp.barrier
    llvm.return
  }
  llvm.func @_mlir_ciface__QQmain_omp_outline_1() attributes {llvm.emit_c_interface} {
    llvm.call @_QQmain_omp_outline_1() : () -> ()
    llvm.return
 }
}
```
Using C program to call the function and build with clang args `-fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx1103 ` it's normal when running on the CPU host, but on AMDGPU it raise errors even if replace `llvm_omp_target_alloc_device` to `llvm.alloc` and with a smaller size:
```
Libomptarget error: Host ptr 0x0000560ed81500a1 does not have a matching target pointer.
Libomptarget error: Consult https://openmp.llvm.org/design/Runtimes.html for debugging options.
Libomptarget error: Source location information not present. Compile with -g or -gline-tables-only.
Libomptarget fatal error 1: failure of target construct while offloading is mandatory
```
The C code has similar function works normally on the AMDGPU:
```
int main() {
 double* a = (double*)llvm_omp_target_alloc_device(4096*4096*sizeof(double) , omp_get_default_device());
    double* b = (double*)llvm_omp_target_alloc_device(4096*4096*sizeof(double) , omp_get_default_device());
    #pragma omp target teams map(tofrom: a, b)
    #pragma omp parallel for
        for(int i=0; i<4096; i++){
 for(int j=0; j<4096; j++){
                for (int k=0; k<4096; k++){
                a[i*4096+j] = i * j;
                b[j*4096+k] = j / i;
                }
            }
        }
}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to