[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-06 Thread Joseph Huber via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8477a0d769a0: [OpenMP] Allow compiling multiple target 
architectures with OpenMP (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D124721?vs=426521&id=427734#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/amdgpu-openmp-toolchain-new.c
  clang/test/Driver/openmp-offload-gpu-new.c

Index: clang/test/Driver/openmp-offload-gpu-new.c
===
--- clang/test/Driver/openmp-offload-gpu-new.c
+++ clang/test/Driver/openmp-offload-gpu-new.c
@@ -10,6 +10,10 @@
 // RUN:  -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
 // RUN:  --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  --offload-arch=sm_52 \
+// RUN:  --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -40,6 +44,27 @@
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]"
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
 
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
+
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
+// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
 
Index: clang/test/Driver/amdgpu-openmp-toolchain-new.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain-new.c
+++ clang/test/Driver/amdgpu-openmp-toolchain-new.c
@@ -3,6 +3,9 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
 // RUN:  -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:

[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp:313
+  if (BoundArch.empty())
+checkSystemForAMDGPU(Args, *this, Arch);
   DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);

tra wrote:
> I'd change `checkSystemForAMDGPU` to return the Arch or empty string.
> 
> I'd also simplify the code to something like this:
> ```
> std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str();
> if (Arch.empty()) {
>   Arch = !BoundArch.empty() ? BoundArch :  checkSystemForAMDGPU(Args, *this);
>   DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
> }
> ```
Having `checkSystemForAMDGPU` cause errors when trying to test this was a pain. 
It'll take a bit more plumbing to address that so I think I'll leave this as-is 
for this patch and address it in a follow-up.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-05 Thread Artem Belevich via Phabricator via cfe-commits
tra accepted this revision.
tra added a comment.
This revision is now accepted and ready to land.

LGTM in general.




Comment at: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp:313
+  if (BoundArch.empty())
+checkSystemForAMDGPU(Args, *this, Arch);
   DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);

I'd change `checkSystemForAMDGPU` to return the Arch or empty string.

I'd also simplify the code to something like this:
```
std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str();
if (Arch.empty()) {
  Arch = !BoundArch.empty() ? BoundArch :  checkSystemForAMDGPU(Args, *this);
  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
}
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-05 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added a comment.

ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/Driver/openmp-offload-gpu-new.c:56
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings 
-fopenmp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \

tra wrote:
> You may want to add a test that when no `--offload-arch=` is specified, 
> driver makes a reasonable default choice for both nvptx and amdgpu.
OpenMP uses a different default from CUDA / HIP. If not specified, it will use 
the `CLANG_OPENMP_NVPTX_DEFAULT_ARCH` definition (Should be set automatically 
when CMake configures Clang) or the `amdgpu-arch` tool and pass it in via the 
`-march=` option. This is indicated by passing an empty StringRef to the bound 
architecture while CUDA / HIP use some default. Elsewhere, if we see the 
BoundArchitecture is empty we just get the value of the `-march` option 
instead. This should be covered by some other tests and this doesn't change the 
default behaviour.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.






Comment at: clang/test/Driver/openmp-offload-gpu-new.c:56
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings 
-fopenmp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa 
-Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \

You may want to add a test that when no `--offload-arch=` is specified, driver 
makes a reasonable default choice for both nvptx and amdgpu.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 updated this revision to Diff 426521.
jhuber6 added a comment.

Changing slightly, I'm using the `getArgsForToolchain` to only get the 
`--offload-arch` options for that toolchain. This lets us quality it with 
options like `-Xopenmp-target=` so we can now specify architectures 
per-toolchain without it causing an error. For example, the following should 
work:

  clang input.c -fopenmp -fopenmp-targets=nvptx64,amdgcn 
-Xopenmp-targets=amdgcn --offload-arch=gfx803 -Xopenmp-targets=nvptx64 
--offload-arch=sm_70 -c


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/amdgpu-openmp-toolchain-new.c
  clang/test/Driver/openmp-offload-gpu-new.c

Index: clang/test/Driver/openmp-offload-gpu-new.c
===
--- clang/test/Driver/openmp-offload-gpu-new.c
+++ clang/test/Driver/openmp-offload-gpu-new.c
@@ -10,6 +10,10 @@
 // RUN:  -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
 // RUN:  --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  --offload-arch=sm_52 \
+// RUN:  --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -40,6 +44,27 @@
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]"
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
 
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
+
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
+// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
 
Index: clang/test/Driver/amdgpu-openmp-toolchain-new.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain-new.c
+++ clang/test/Driver/amdgpu-openmp-toolchain-new.c
@@ -3,6 +3,9 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 

[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

saiislam wrote:
> jhuber6 wrote:
> > saiislam wrote:
> > > saiislam wrote:
> > > > jhuber6 wrote:
> > > > > saiislam wrote:
> > > > > > Wouldn't it be better if the user is not required to specify the 
> > > > > > triple in this shorthand version? We can infer the triple from the 
> > > > > > GPUArch. We have this support in our downstream branch.
> > > > > > 
> > > > > > ```
> > > > > > clang  --target=x86_64-unknown-linux-gnu -fopenmp 
> > > > > > --offload-arch=gfx906 helloworld.c -o helloworld
> > > > > > ```
> > > > > We could, HIP and CUDA both use some kind of 
> > > > > `getAMDOffloadTargetTriple`. I guess in this case we would consider 
> > > > > OpenMP offloading active if the user specified `-fopenmp` and 
> > > > > `--offload-arch`? I could do this in a separate patch.
> > > > Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
> > > > `--offload-arch` both are present.
> > > > 
> > > > Thank you!
> > > Following code might be useful for your patch (it assumes that 
> > > OffloadArch is associated with each device tool chain so that multiple 
> > > archs of same triple can be compiled together):
> > > 
> > > 
> > >   # [[ 
> > > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L735
> > >  | GetTargetInfoFromOffloadArch() ]]
> > >   # [[ 
> > > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L779
> > >  | Driver::GetTargetInfoFromMarch() ]]
> > >   # [[ 
> > > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L819
> > >  | Driver::GetTargetInfoFromOffloadArchOpts() ]]
> > >   # [[ 
> > > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L851
> > >  | modified definition of Driver::CreateOffloadingDeviceToolChains() ]]
> > > 
> > I'll look into it, I was thinking of a good way to specify architectures 
> > per triple. So we could theoretically have `--offload-arch=sm_70` and 
> > `--offload_arch=gfx908` work in unison and it might just be easy to group 
> > the triples from the architecture.
> Along with this, we would also like to support --offload-arch=gfx906 and 
> --offload-arch=gfx908 in the same command.
This patch already supports that, we'll compile for all the architectures and 
they'll all end up linked in the linker wrapper. What's missing is the changes 
to select an appropriate image in the `libomptarget` runtime.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

jhuber6 wrote:
> saiislam wrote:
> > saiislam wrote:
> > > jhuber6 wrote:
> > > > saiislam wrote:
> > > > > Wouldn't it be better if the user is not required to specify the 
> > > > > triple in this shorthand version? We can infer the triple from the 
> > > > > GPUArch. We have this support in our downstream branch.
> > > > > 
> > > > > ```
> > > > > clang  --target=x86_64-unknown-linux-gnu -fopenmp 
> > > > > --offload-arch=gfx906 helloworld.c -o helloworld
> > > > > ```
> > > > We could, HIP and CUDA both use some kind of 
> > > > `getAMDOffloadTargetTriple`. I guess in this case we would consider 
> > > > OpenMP offloading active if the user specified `-fopenmp` and 
> > > > `--offload-arch`? I could do this in a separate patch.
> > > Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
> > > `--offload-arch` both are present.
> > > 
> > > Thank you!
> > Following code might be useful for your patch (it assumes that OffloadArch 
> > is associated with each device tool chain so that multiple archs of same 
> > triple can be compiled together):
> > 
> > 
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L735
> >  | GetTargetInfoFromOffloadArch() ]]
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L779
> >  | Driver::GetTargetInfoFromMarch() ]]
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L819
> >  | Driver::GetTargetInfoFromOffloadArchOpts() ]]
> >   # [[ 
> > https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L851
> >  | modified definition of Driver::CreateOffloadingDeviceToolChains() ]]
> > 
> I'll look into it, I was thinking of a good way to specify architectures per 
> triple. So we could theoretically have `--offload-arch=sm_70` and 
> `--offload_arch=gfx908` work in unison and it might just be easy to group the 
> triples from the architecture.
Along with this, we would also like to support --offload-arch=gfx906 and 
--offload-arch=gfx908 in the same command.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

saiislam wrote:
> saiislam wrote:
> > jhuber6 wrote:
> > > saiislam wrote:
> > > > Wouldn't it be better if the user is not required to specify the triple 
> > > > in this shorthand version? We can infer the triple from the GPUArch. We 
> > > > have this support in our downstream branch.
> > > > 
> > > > ```
> > > > clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
> > > > helloworld.c -o helloworld
> > > > ```
> > > We could, HIP and CUDA both use some kind of `getAMDOffloadTargetTriple`. 
> > > I guess in this case we would consider OpenMP offloading active if the 
> > > user specified `-fopenmp` and `--offload-arch`? I could do this in a 
> > > separate patch.
> > Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
> > `--offload-arch` both are present.
> > 
> > Thank you!
> Following code might be useful for your patch (it assumes that OffloadArch is 
> associated with each device tool chain so that multiple archs of same triple 
> can be compiled together):
> 
> 
>   # [[ 
> https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L735
>  | GetTargetInfoFromOffloadArch() ]]
>   # [[ 
> https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L779
>  | Driver::GetTargetInfoFromMarch() ]]
>   # [[ 
> https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L819
>  | Driver::GetTargetInfoFromOffloadArchOpts() ]]
>   # [[ 
> https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L851
>  | modified definition of Driver::CreateOffloadingDeviceToolChains() ]]
> 
I'll look into it, I was thinking of a good way to specify architectures per 
triple. So we could theoretically have `--offload-arch=sm_70` and 
`--offload_arch=gfx908` work in unison and it might just be easy to group the 
triples from the architecture.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

saiislam wrote:
> jhuber6 wrote:
> > saiislam wrote:
> > > Wouldn't it be better if the user is not required to specify the triple 
> > > in this shorthand version? We can infer the triple from the GPUArch. We 
> > > have this support in our downstream branch.
> > > 
> > > ```
> > > clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
> > > helloworld.c -o helloworld
> > > ```
> > We could, HIP and CUDA both use some kind of `getAMDOffloadTargetTriple`. I 
> > guess in this case we would consider OpenMP offloading active if the user 
> > specified `-fopenmp` and `--offload-arch`? I could do this in a separate 
> > patch.
> Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
> `--offload-arch` both are present.
> 
> Thank you!
Following code might be useful for your patch (it assumes that OffloadArch is 
associated with each device tool chain so that multiple archs of same triple 
can be compiled together):


  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L735
 | GetTargetInfoFromOffloadArch() ]]
  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L779
 | Driver::GetTargetInfoFromMarch() ]]
  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L819
 | Driver::GetTargetInfoFromOffloadArchOpts() ]]
  # [[ 
https://github.com/RadeonOpenCompute/llvm-project/blob/359002f885ea860609f0c841d69f4970ccbb37af/clang/lib/Driver/Driver.cpp#L851
 | modified definition of Driver::CreateOffloadingDeviceToolChains() ]]



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

jhuber6 wrote:
> saiislam wrote:
> > Wouldn't it be better if the user is not required to specify the triple in 
> > this shorthand version? We can infer the triple from the GPUArch. We have 
> > this support in our downstream branch.
> > 
> > ```
> > clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
> > helloworld.c -o helloworld
> > ```
> We could, HIP and CUDA both use some kind of `getAMDOffloadTargetTriple`. I 
> guess in this case we would consider OpenMP offloading active if the user 
> specified `-fopenmp` and `--offload-arch`? I could do this in a separate 
> patch.
Yes, exactly. OpenMP offloading should be active when `-fopenmp` and 
`--offload-arch` both are present.

Thank you!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-02 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

saiislam wrote:
> Wouldn't it be better if the user is not required to specify the triple in 
> this shorthand version? We can infer the triple from the GPUArch. We have 
> this support in our downstream branch.
> 
> ```
> clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
> helloworld.c -o helloworld
> ```
We could, HIP and CUDA both use some kind of `getAMDOffloadTargetTriple`. I 
guess in this case we would consider OpenMP offloading active if the user 
specified `-fopenmp` and `--offload-arch`? I could do this in a separate patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-05-01 Thread Saiyedul Islam via Phabricator via cfe-commits
saiislam added inline comments.



Comment at: clang/test/Driver/amdgpu-openmp-toolchain-new.c:6
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 
--libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \

Wouldn't it be better if the user is not required to specify the triple in this 
shorthand version? We can infer the triple from the GPUArch. We have this 
support in our downstream branch.

```
clang  --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx906 
helloworld.c -o helloworld
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124721/new/

https://reviews.llvm.org/D124721

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124721: [OpenMP] Allow compiling multiple target architectures with OpenMP

2022-04-30 Thread Joseph Huber via Phabricator via cfe-commits
jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, JonChesterfield, yaxunl, saiislam, 
tianshilei1992, tra.
Herald added subscribers: kerbowa, guansong, jvesely.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, MaskRay.
Herald added a project: clang.

This patch adds support for OpenMP to use the `--offload-arch` and
`--no-offload-arch` options. Traditionally, OpenMP has only supported
compiling for a single architecture via the `-Xopenmp-target` option.
Now we can pass in a bound architecture and use that if given, otherwise
we default to the value of the `-march` option as before.

Note that this only applies the basic support, the OpenMP target runtime
does not yet know how to choose between multiple architectures.
Additionally other parts of the offloading toolchain (e.g. LTO) require
the `-march` option, these should be worked out later.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124721

Files:
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/amdgpu-openmp-toolchain-new.c
  clang/test/Driver/openmp-offload-gpu-new.c

Index: clang/test/Driver/openmp-offload-gpu-new.c
===
--- clang/test/Driver/openmp-offload-gpu-new.c
+++ clang/test/Driver/openmp-offload-gpu-new.c
@@ -10,6 +10,10 @@
 // RUN:  -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
 // RUN:  --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:  --offload-arch=sm_52 \
+// RUN:  --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -40,6 +44,15 @@
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]"
 // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
 
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
 // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
 
Index: clang/test/Driver/amdgpu-openmp-toolchain-new.c
===
--- clang/test/Driver/amdgpu-openmp-toolchain-new.c
+++ clang/test/Driver/amdgpu-openmp-toolchain-new.c
@@ -3,6 +3,9 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
 // RUN:  -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
 // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:  --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
+// RUN:   | FileCheck %s
 
 // verify the tools invocations
 // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -34,6 +37,7 @@
 // CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}}
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amd