[clang] [clang-tools-extra] [libcxx] [clang] Enable sized deallocation by default in C++14 onwards (PR #83774)

2024-03-11 Thread Wang Pengcheng via cfe-commits


@@ -7105,10 +7105,15 @@ void Clang::ConstructJob(Compilation &C, const 
JobAction &JA,
   Args.addOptInFlag(CmdArgs, options::OPT_frelaxed_template_template_args,
 options::OPT_fno_relaxed_template_template_args);
 
-  // -fsized-deallocation is off by default, as it is an ABI-breaking change 
for
-  // most platforms.
-  Args.addOptInFlag(CmdArgs, options::OPT_fsized_deallocation,
-options::OPT_fno_sized_deallocation);
+  // -fsized-deallocation is on by default in C++14 onwards and otherwise off
+  // by default.
+  if (Arg *A = Args.getLastArg(options::OPT_fsized_deallocation,

wangpc-pp wrote:

Sorry I may not understand what you mean, can I leave it unchanged as this code 
is just like `-faligned-allocation`. We can change it later.

https://github.com/llvm/llvm-project/pull/83774
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Use timeTraceAsyncProfilerBegin for Source span (PR #83961)

2024-03-11 Thread Takuto Ikuta via cfe-commits


@@ -102,23 +104,24 @@ struct llvm::TimeTraceProfiler {
 llvm::get_thread_name(ThreadName);
   }
 
-  void begin(std::string Name, llvm::function_ref Detail) {
-Stack.emplace_back(ClockType::now(), TimePointType(), std::move(Name),
-   Detail());
+  TimeTraceProfilerEntry *begin(std::string Name,
+llvm::function_ref Detail,
+bool AsyncEvent = false) {
+Stack.emplace_back(std::make_unique(
+ClockType::now(), TimePointType(), std::move(Name), Detail(),
+AsyncEvent));
+return Stack.back().get();
   }
 
   void end() {
+TimeTraceProfilerEntry *E = Stack.back().get();

atetubou wrote:

But this function calls `end(TimeTraceProfilerEntry &E)` and that covers the 
assertion?

https://github.com/llvm/llvm-project/pull/83961
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Use timeTraceAsyncProfilerBegin for Source span (PR #83961)

2024-03-11 Thread Takuto Ikuta via cfe-commits

atetubou wrote:

> IIUC, the approach you choose here is to let `SemaPPCallbacks` control the 
> "entered file stack" and allow it to remove element (which is file) from 
> middle of the internal stack in `TimeTraceProfiler`, but this creates async 
> event which is not designed for this purpose.
> 
> Can we let `SemaPPCallbacks` track the last push file into the stack and when 
> exit file, pop all the elements from the stack until we popped the last 
> pushed file?

As I wrote in 
https://github.com/llvm/llvm-project/issues/56554#issuecomment-1975812398, file 
level span and syntax tree level span should be handled asynchronously. So 
using such implementation produces incorrect trace in other edge cases.

https://github.com/llvm/llvm-project/pull/83961
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Use timeTraceAsyncProfilerBegin for Source span (PR #83961)

2024-03-11 Thread Takuto Ikuta via cfe-commits

https://github.com/atetubou updated 
https://github.com/llvm/llvm-project/pull/83961

>From 90ebde07f7fa426a37dd4bdc362e1a809aaf0844 Mon Sep 17 00:00:00 2001
From: Takuto Ikuta 
Date: Mon, 4 Mar 2024 19:12:31 +0900
Subject: [PATCH 1/3] Expose TimeTraceProfiler for Async Events

---
 llvm/include/llvm/Support/TimeProfiler.h|  34 +--
 llvm/lib/Support/TimeProfiler.cpp   | 101 ++--
 llvm/unittests/Support/TimeProfilerTest.cpp |  11 +++
 3 files changed, 107 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/Support/TimeProfiler.h 
b/llvm/include/llvm/Support/TimeProfiler.h
index 454a65f70231f4..31f7df10916db9 100644
--- a/llvm/include/llvm/Support/TimeProfiler.h
+++ b/llvm/include/llvm/Support/TimeProfiler.h
@@ -86,6 +86,8 @@ class raw_pwrite_stream;
 struct TimeTraceProfiler;
 TimeTraceProfiler *getTimeTraceProfilerInstance();
 
+struct TimeTraceProfilerEntry;
+
 /// Initialize the time trace profiler.
 /// This sets up the global \p TimeTraceProfilerInstance
 /// variable to be the profiler instance.
@@ -120,19 +122,30 @@ Error timeTraceProfilerWrite(StringRef PreferredFileName,
 /// Profiler copies the string data, so the pointers can be given into
 /// temporaries. Time sections can be hierarchical; every Begin must have a
 /// matching End pair but they can nest.
-void timeTraceProfilerBegin(StringRef Name, StringRef Detail);
-void timeTraceProfilerBegin(StringRef Name,
-llvm::function_ref Detail);
+TimeTraceProfilerEntry *timeTraceProfilerBegin(StringRef Name,
+   StringRef Detail);
+TimeTraceProfilerEntry *
+timeTraceProfilerBegin(StringRef Name,
+   llvm::function_ref Detail);
+
+/// Manually begin a time section, with the given \p Name and \p Detail.
+/// This starts Async Events having \p Name as a category which is shown
+/// separately from other traces. See
+/// 
https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.jh64i9l3vwa1
+/// for more details.
+TimeTraceProfilerEntry *timeTraceAsyncProfilerBegin(StringRef Name,
+StringRef Detail);
 
 /// Manually end the last time section.
 void timeTraceProfilerEnd();
+void timeTraceProfilerEnd(TimeTraceProfilerEntry *E);
 
 /// The TimeTraceScope is a helper class to call the begin and end functions
 /// of the time trace profiler.  When the object is constructed, it begins
 /// the section; and when it is destroyed, it stops it. If the time profiler
 /// is not initialized, the overhead is a single branch.
-struct TimeTraceScope {
-
+class TimeTraceScope {
+public:
   TimeTraceScope() = delete;
   TimeTraceScope(const TimeTraceScope &) = delete;
   TimeTraceScope &operator=(const TimeTraceScope &) = delete;
@@ -141,20 +154,23 @@ struct TimeTraceScope {
 
   TimeTraceScope(StringRef Name) {
 if (getTimeTraceProfilerInstance() != nullptr)
-  timeTraceProfilerBegin(Name, StringRef(""));
+  Entry = timeTraceProfilerBegin(Name, StringRef(""));
   }
   TimeTraceScope(StringRef Name, StringRef Detail) {
 if (getTimeTraceProfilerInstance() != nullptr)
-  timeTraceProfilerBegin(Name, Detail);
+  Entry = timeTraceProfilerBegin(Name, Detail);
   }
   TimeTraceScope(StringRef Name, llvm::function_ref Detail) {
 if (getTimeTraceProfilerInstance() != nullptr)
-  timeTraceProfilerBegin(Name, Detail);
+  Entry = timeTraceProfilerBegin(Name, Detail);
   }
   ~TimeTraceScope() {
 if (getTimeTraceProfilerInstance() != nullptr)
-  timeTraceProfilerEnd();
+  timeTraceProfilerEnd(Entry);
   }
+
+private:
+  TimeTraceProfilerEntry *Entry = nullptr;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Support/TimeProfiler.cpp 
b/llvm/lib/Support/TimeProfiler.cpp
index 4d625b3eb5b170..3114f8e7ded598 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -11,6 +11,7 @@
 
//===--===//
 
 #include "llvm/Support/TimeProfiler.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/JSON.h"
@@ -20,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -64,17 +66,19 @@ using CountAndDurationType = std::pair;
 using NameAndCountAndDurationType =
 std::pair;
 
+} // anonymous namespace
+
 /// Represents an open or completed time section entry to be captured.
-struct TimeTraceProfilerEntry {
+struct llvm::TimeTraceProfilerEntry {
   const TimePointType Start;
   TimePointType End;
   const std::string Name;
   const std::string Detail;
-
+  const bool AsyncEvent = false;
   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
- std::string &&Dt)
+ std::string &&Dt, bool Ae)
   : Start(std::move(S)), End(std::move(E)),

[clang] [llvm] [RISCV] Add generic CPUs for profiles (PR #84877)

2024-03-11 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-driver

Author: Wang Pengcheng (wangpc-pp)


Changes

As discussed in 
https://github.com/llvm/llvm-project/pull/76357#discussion_r1518452608,
we may need to add generic CPUs for profiles.

I don't know if we need S-mode profile CPUs.


---

Patch is 33.35 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/84877.diff


3 Files Affected:

- (modified) clang/test/Driver/riscv-cpus.c (+319) 
- (modified) clang/test/Misc/target-invalid-cpu-note.c (+4-4) 
- (modified) llvm/lib/Target/RISCV/RISCVProcessors.td (+216-8) 


``diff
diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c
index ff2bd6f7c8ba34..a285f0f9c41f54 100644
--- a/clang/test/Driver/riscv-cpus.c
+++ b/clang/test/Driver/riscv-cpus.c
@@ -302,3 +302,322 @@
 
 // RUN: not %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv32 
-march=rv64i | FileCheck -check-prefix=MISMATCH-ARCH %s
 // MISMATCH-ARCH: cpu 'generic-rv32' does not support rv64
+
+// Check profile CPUs
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rvi20u32 | 
FileCheck -check-prefix=MCPU-GENERIC-RVI20U32 %s
+// MCPU-GENERIC-RVI20U32: "-target-cpu" "generic-rvi20u32"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-a"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-c"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-d"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-f"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-m"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-abi" "ilp32"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rvi20u64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVI20U64 %s
+// MCPU-GENERIC-RVI20U64: "-target-cpu" "generic-rvi20u64"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-a"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-c"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-d"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-f"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-m"
+// MCPU-GENERIC-RVI20U64-SAME: "-target-abi" "lp64"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rva20u64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVA20U64 %s
+// MCPU-GENERIC-RVA20U64: "-target-cpu" "generic-rva20u64"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+m"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+a"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+f"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+d"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+c"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+ziccamoa"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+ziccif"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+zicclsm"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+ziccrse"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+zicntr"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+zicsr"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+za128rs"
+// MCPU-GENERIC-RVA20U64-SAME: "-target-abi" "lp64d"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rva20s64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVA20S64 %s
+// MCPU-GENERIC-RVA20S64: "-target-cpu" "generic-rva20s64"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+m"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+a"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+f"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+d"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+c"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ziccamoa"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ziccif"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zicclsm"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ziccrse"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zicntr"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zicsr"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zifencei"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+za128rs"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ssccptr"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+sstvala"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+sstvecd"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+svade"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+svbare"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-abi" "lp64d"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rva22u64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVA22U64 %s
+// MCPU-GENERIC-RVA22U64: "-target-cpu" "generic-rva22u64"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+m"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+a"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+f"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+d"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+c"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+zic64b"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+zicbom"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+zicbop"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+zicbo

[clang] [llvm] [RISCV] Add generic CPUs for profiles (PR #84877)

2024-03-11 Thread Wang Pengcheng via cfe-commits

https://github.com/wangpc-pp created 
https://github.com/llvm/llvm-project/pull/84877

As discussed in 
https://github.com/llvm/llvm-project/pull/76357#discussion_r1518452608,
we may need to add generic CPUs for profiles.

I don't know if we need S-mode profile CPUs.


>From ec68548a470d6d9032a900a725e95b92691657b2 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng 
Date: Tue, 12 Mar 2024 14:28:09 +0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 clang/test/Driver/riscv-cpus.c| 319 ++
 clang/test/Misc/target-invalid-cpu-note.c |   8 +-
 llvm/lib/Target/RISCV/RISCVProcessors.td  | 224 ++-
 3 files changed, 539 insertions(+), 12 deletions(-)

diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c
index ff2bd6f7c8ba34..a285f0f9c41f54 100644
--- a/clang/test/Driver/riscv-cpus.c
+++ b/clang/test/Driver/riscv-cpus.c
@@ -302,3 +302,322 @@
 
 // RUN: not %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv32 
-march=rv64i | FileCheck -check-prefix=MISMATCH-ARCH %s
 // MISMATCH-ARCH: cpu 'generic-rv32' does not support rv64
+
+// Check profile CPUs
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rvi20u32 | 
FileCheck -check-prefix=MCPU-GENERIC-RVI20U32 %s
+// MCPU-GENERIC-RVI20U32: "-target-cpu" "generic-rvi20u32"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-a"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-c"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-d"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-f"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-feature" "-m"
+// MCPU-GENERIC-RVI20U32-SAME: "-target-abi" "ilp32"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rvi20u64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVI20U64 %s
+// MCPU-GENERIC-RVI20U64: "-target-cpu" "generic-rvi20u64"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-a"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-c"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-d"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-f"
+// MCPU-GENERIC-RVI20U64: "-target-feature" "-m"
+// MCPU-GENERIC-RVI20U64-SAME: "-target-abi" "lp64"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rva20u64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVA20U64 %s
+// MCPU-GENERIC-RVA20U64: "-target-cpu" "generic-rva20u64"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+m"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+a"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+f"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+d"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+c"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+ziccamoa"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+ziccif"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+zicclsm"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+ziccrse"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+zicntr"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+zicsr"
+// MCPU-GENERIC-RVA20U64: "-target-feature" "+za128rs"
+// MCPU-GENERIC-RVA20U64-SAME: "-target-abi" "lp64d"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rva20s64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVA20S64 %s
+// MCPU-GENERIC-RVA20S64: "-target-cpu" "generic-rva20s64"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+m"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+a"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+f"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+d"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+c"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ziccamoa"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ziccif"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zicclsm"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ziccrse"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zicntr"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zicsr"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+zifencei"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+za128rs"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+ssccptr"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+sstvala"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+sstvecd"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+svade"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-feature" "+svbare"
+// MCPU-GENERIC-RVA20S64-SAME: "-target-abi" "lp64d"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=generic-rva22u64 | 
FileCheck -check-prefix=MCPU-GENERIC-RVA22U64 %s
+// MCPU-GENERIC-RVA22U64: "-target-cpu" "generic-rva22u64"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+m"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+a"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+f"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+d"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+c"
+// MCPU-GENERIC-RVA22U64-SAME: "-target-feature" "+zic64

[clang] [llvm] [AMDGPU] Adding the amdgpu-num-work-groups function attribute (PR #79035)

2024-03-11 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Adding the amdgpu-num-work-groups function attribute (PR #79035)

2024-03-11 Thread Matt Arsenault via cfe-commits


@@ -137,6 +137,12 @@ Removed Compiler Flags
 
 Attribute Changes in Clang
 --
+- Introduced a new function attribute 
``__attribute__((amdgpu_max_num_work_groups(x, y, z)))`` or

arsenm wrote:

I think ".max_num_workgroups" "amdgpu-max-num-workgroups" and 
"amdgpu_max_num_work_groups" is the most consistent with the existing uses 

https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [WebAssembly] Implement an alternative translation for -wasm-enable-sjlj (PR #84137)

2024-03-11 Thread Sam Clegg via cfe-commits

sbc100 wrote:

> @aheejin @sbc100 let me confirm the plan on this PR. i can remove the option 
> `-mllvm -experimental-wasm-enable-alt-sjlj` by making it unconditionally 
> true, and update tests, right?

That is my understanding yes.

https://github.com/llvm/llvm-project/pull/84137
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang][BPF] Allow sign extension for int type call parameters (PR #84874)

2024-03-11 Thread via cfe-commits

yonghong-song wrote:

cc @anakryiko @jemarch

https://github.com/llvm/llvm-project/pull/84874
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang][BPF] Allow sign extension for int type call parameters (PR #84874)

2024-03-11 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: None (yonghong-song)


Changes

Pu Lehui (pulehui@huaweicloud.com) reported an issue in private that at 
no_alu32 mode clang may generate code which produced incorrect result with 
riscv architecture.

The affected bpf prog is kfunc_call_test4 at bpf selftests 
prog/kfunc_call_test.c. The following is the source code:
```
  long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
  int kfunc_call_test4(struct __sk_buff *skb)
  {
...
tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
...
  }
```
For the above code, at no_alu32 mode (-mcpu=v2), the asm code looks like
  0: r1 = -3
  1: r2 = -30
  2: r3 = 0xff38 ll // opcode: 18 03 00 00 38 ff ff ff 00 00 00 00 00 00 00 
00
  4: r4 = -1000
  5: call bpf_kfunc_call_test4

In bpf_kfunc_call_test4(), arguments with 'char', 'short' and 'long' are 
generated correctly, for r1, r2 and r4 in the above. But the argument r3 is 
generated with ld_imm64.

Further investigation is found that
  - char/short type arguments are signed extended so naturally using MOV insn
  - int type argument are zero extended so using ld_imm64 insn
  - long type argument can do sign extension with 32-bit value so using MOV insn

In riscv case, the 'r3' value (0xff38 ll) will be passed to riscv kernel 
code which does not do 32-bit sign extension and caused incorrect result.

Why intel/arm64 does not have this issue? x86_64/arm64 supports subrgisters so 
for 'int' types, subregisters are directly used hence there is no issue.

Considering BPF is a 64-bit arch, so I think it makes sense at IR level 'int' 
type argument should have the same sign-extension as 'char' or 'short' type. 
This will solve the above riscv issue.

This patch will cause two codegen changes:
  - for an 'int' constant argument, a MOV insn will be used instead of a 
ld_imm64.
  - for an 'int' register argument, for cpu=v1/v2, left/right shift will 
happen. for cpu=v3/v4, there is no change from previous behavior as 
subregisters will be used.

Tested with bpf selftests with all of no-alu32, cpu=v3 and cpu=v4, and all 
passed.

---
Full diff: https://github.com/llvm/llvm-project/pull/84874.diff


3 Files Affected:

- (modified) clang/lib/CodeGen/Targets/BPF.cpp (+13) 
- (modified) clang/test/CodeGen/bpf-abiinfo.c (+10) 
- (added) llvm/test/CodeGen/BPF/cc_args_int.ll (+34) 


``diff
diff --git a/clang/lib/CodeGen/Targets/BPF.cpp 
b/clang/lib/CodeGen/Targets/BPF.cpp
index 2849222f7a1869..01937574779618 100644
--- a/clang/lib/CodeGen/Targets/BPF.cpp
+++ b/clang/lib/CodeGen/Targets/BPF.cpp
@@ -22,6 +22,19 @@ class BPFABIInfo : public DefaultABIInfo {
 public:
   BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
+  bool isPromotableIntegerTypeForABI(QualType Ty) const {
+if (ABIInfo::isPromotableIntegerTypeForABI(Ty) == true)
+  return true;
+
+if (const auto *BT = Ty->getAs()) {
+  // For 'signed int' type, return true to allow sign-extension.
+  if (BT->getKind() == BuiltinType::Int)
+return true;
+}
+
+return false;
+  }
+
   ABIArgInfo classifyArgumentType(QualType Ty) const {
 Ty = useFirstFieldIfTransparentUnion(Ty);
 
diff --git a/clang/test/CodeGen/bpf-abiinfo.c b/clang/test/CodeGen/bpf-abiinfo.c
index 366e8003f45572..6d259d8e6d6c73 100644
--- a/clang/test/CodeGen/bpf-abiinfo.c
+++ b/clang/test/CodeGen/bpf-abiinfo.c
@@ -22,3 +22,13 @@ int foo_int(void) {
 if (bar_int() != 10) return 0; else return 1;
 }
 // CHECK: %call = call i32 @bar_int()
+
+void sprog1(short, int, int);
+void mprog1() {
+  sprog1(-3, 4, -5);
+// CHECK: call void @sprog1(i16 noundef signext -3, i32 noundef signext 4, i32 
noundef signext -5)
+}
+void mprog2(long a, long b) {
+  sprog1(a, b, b);
+// CHECK: call void @sprog1(i16 noundef signext %{{[0-9a-z]+}}, i32 noundef 
signext %{{[0-9a-z]+}}, i32 noundef signext %{{[0-9a-z]+}})
+}
diff --git a/llvm/test/CodeGen/BPF/cc_args_int.ll 
b/llvm/test/CodeGen/BPF/cc_args_int.ll
new file mode 100644
index 00..79a9d27b87d709
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/cc_args_int.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=bpfel -mcpu=v1 < %s | FileCheck --check-prefix=CHECK-V1 %s
+; RUN: llc -march=bpfel -mcpu=v2 < %s | FileCheck --check-prefix=CHECK-V2 %s
+; RUN: llc -march=bpfel -mcpu=v3 < %s | FileCheck --check-prefix=CHECK-V3 %s
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck --check-prefix=CHECK-V4 %s
+
+declare dso_local void @bar(i16 noundef signext, i32 noundef signext, i32 
noundef signext) local_unnamed_addr
+
+define void @test() {
+entry:
+  tail call void @bar(i16 noundef signext -3, i32 noundef signext 4, i32 
noundef signext -5)
+; CHECK-V1: r2 = 4
+; CHECK-V1: r3 = -5
+; CHECK-V2: r2 = 4
+; CHECK-V2: r3 = -5
+; CHECK-V3: w2 = 4
+; CHECK-V3: w3 = -5
+; CHECK-V4: w2 = 4
+; CHECK-V4: w3 = -5
+  ret void
+}
+
+define dso_local void @test2(i64 noundef %a, i64 n

[clang] [llvm] [Clang][BPF] Allow sign extension for int type call parameters (PR #84874)

2024-03-11 Thread via cfe-commits

https://github.com/yonghong-song created 
https://github.com/llvm/llvm-project/pull/84874

Pu Lehui (pule...@huaweicloud.com) reported an issue in private that at 
no_alu32 mode clang may generate code which produced incorrect result with 
riscv architecture.

The affected bpf prog is kfunc_call_test4 at bpf selftests 
prog/kfunc_call_test.c. The following is the source code:
```
  long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
  int kfunc_call_test4(struct __sk_buff *skb)
  {
...
tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
...
  }
```
For the above code, at no_alu32 mode (-mcpu=v2), the asm code looks like
  0: r1 = -3
  1: r2 = -30
  2: r3 = 0xff38 ll // opcode: 18 03 00 00 38 ff ff ff 00 00 00 00 00 00 00 
00
  4: r4 = -1000
  5: call bpf_kfunc_call_test4

In bpf_kfunc_call_test4(), arguments with 'char', 'short' and 'long' are 
generated correctly, for r1, r2 and r4 in the above. But the argument r3 is 
generated with ld_imm64.

Further investigation is found that
  - char/short type arguments are signed extended so naturally using MOV insn
  - int type argument are zero extended so using ld_imm64 insn
  - long type argument can do sign extension with 32-bit value so using MOV insn

In riscv case, the 'r3' value (0xff38 ll) will be passed to riscv kernel 
code which does not do 32-bit sign extension and caused incorrect result.

Why intel/arm64 does not have this issue? x86_64/arm64 supports subrgisters so 
for 'int' types, subregisters are directly used hence there is no issue.

Considering BPF is a 64-bit arch, so I think it makes sense at IR level 'int' 
type argument should have the same sign-extension as 'char' or 'short' type. 
This will solve the above riscv issue.

This patch will cause two codegen changes:
  - for an 'int' constant argument, a MOV insn will be used instead of a 
ld_imm64.
  - for an 'int' register argument, for cpu=v1/v2, left/right shift will 
happen. for cpu=v3/v4, there is no change from previous behavior as 
subregisters will be used.

Tested with bpf selftests with all of no-alu32, cpu=v3 and cpu=v4, and all 
passed.

>From c61e6ed152c505fc096265e77069bfda08be3436 Mon Sep 17 00:00:00 2001
From: Yonghong Song 
Date: Mon, 11 Mar 2024 22:27:37 -0700
Subject: [PATCH] [Clang][BPF] Allow sign extension for int type call
 parameters

Pu Lehui (pule...@huaweicloud.com) reported an issue in private
that at no_alu32 mode clang may generate code which produced
incorrect result with riscv architecture.

The affected bpf prog is kfunc_call_test4 at bpf selftests
prog/kfunc_call_test.c. The following is the source code:

  long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
  int kfunc_call_test4(struct __sk_buff *skb)
  {
...
tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
...
  }

For the above code, at no_alu32 mode (-mcpu=v2), the asm code looks like
  0: r1 = -3
  1: r2 = -30
  2: r3 = 0xff38 ll // opcode: 18 03 00 00 38 ff ff ff 00 00 00 00 00 00 00 
00
  4: r4 = -1000
  5: call bpf_kfunc_call_test4

In bpf_kfunc_call_test4(), arguments with 'char', 'short' and 'long' are
generated correctly, for r1, r2 and r4 in the above. But the argument
r3 is generated with ld_imm64.

Further investigation is found that
  - char/short type arguments are signed extended so naturally using MOV insn
  - int type argument are zero extended so using ld_imm64 insn
  - long type argument can do sign extension with 32-bit value so using MOV insn

In riscv case, the 'r3' value (0xff38 ll) will be passed to riscv kernel
code which does not do 32-bit sign extension and caused incorrect result.

Why intel/arm64 does not have this issue? x86_64/arm64 supports subrgisters
so for 'int' types, subregisters are directly used hence there is no issue.

Considering BPF is a 64-bit arch, so I think it makes sense at IR level
'int' type argument should have the same sign-extension as 'char' or 'short'
type. This will solve the above riscv issue.

This patch will cause two codegen changes:
  - for an 'int' constant argument, a MOV insn will be used instead of a 
ld_imm64.
  - for an 'int' register argument, for cpu=v1/v2, left/right shift will happen.
for cpu=v3/v4, there is no change from previous behavior as subregisters 
will
be used.

Tested with bpf selftests with all of no-alu32, cpu=v3 and cpu=v4, and
all passed.

Signed-off-by: Yonghong Song 
---
 clang/lib/CodeGen/Targets/BPF.cpp| 13 +++
 clang/test/CodeGen/bpf-abiinfo.c | 10 
 llvm/test/CodeGen/BPF/cc_args_int.ll | 34 
 3 files changed, 57 insertions(+)
 create mode 100644 llvm/test/CodeGen/BPF/cc_args_int.ll

diff --git a/clang/lib/CodeGen/Targets/BPF.cpp 
b/clang/lib/CodeGen/Targets/BPF.cpp
index 2849222f7a1869..01937574779618 100644
--- a/clang/lib/CodeGen/Targets/BPF.cpp
+++ b/clang/lib/CodeGen/Targets/BPF.cpp
@@ -22,6 +22,19 @@ class BPFABIInfo : public DefaultABIInfo {
 pu

[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-11 Thread Matt Arsenault via cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,96 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB) 
{
+  for (auto &I : *BB) {

arsenm wrote:

It feels wrong that you would need to do a scan of a block to find this 

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-11 Thread Matt Arsenault via cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,96 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB) 
{
+  for (auto &I : *BB) {
+auto *II = dyn_cast(&I);
+if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
+  return II;
+  }
+  return std::nullopt;
+}
+
 } // namespace
 
+llvm::CallBase *
+CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input,
+   llvm::Value *ParentToken) {
+  llvm::Value *bundleArgs[] = {ParentToken};
+  llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
+  auto Output = llvm::CallBase::addOperandBundle(
+  Input, llvm::LLVMContext::OB_convergencectrl, OB, Input);
+  Input->replaceAllUsesWith(Output);
+  Input->eraseFromParent();
+  return Output;
+}
+
+llvm::IntrinsicInst *
+CodeGenFunction::EmitConvergenceLoop(llvm::BasicBlock *BB,
+ llvm::Value *ParentToken) {
+  CGBuilderTy::InsertPoint IP = Builder.saveIP();
+  Builder.SetInsertPoint(&BB->front());
+  auto CB = Builder.CreateIntrinsic(
+  llvm::Intrinsic::experimental_convergence_loop, {}, {});
+  Builder.restoreIP(IP);
+
+  auto I = AddConvergenceControlAttr(CB, ParentToken);
+  // Controlled convergence is incompatible with uncontrolled convergence.
+  // Removing any old attributes.
+  I->setNotConvergent();

arsenm wrote:

Don't understand this. I thought it was a verifier error to be not-convergent 
and have a convergence token.

This won't actually do anything for an intrinsic. Because convergent is 
broken/backwards, you can't locally override it at a callsite and you can't 
remove the convergent from the underlying declaration 

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-11 Thread Matt Arsenault via cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -5686,6 +5686,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
   if (!CI->getType()->isVoidTy())
 CI->setName("call");
 
+  if (getTarget().getTriple().isSPIRVLogical() &&
+  CI->getCalledFunction()->isConvergent())

arsenm wrote:

Can you drop the getCalledFunction part? 

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-11 Thread Matt Arsenault via cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,96 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB) 
{

arsenm wrote:

Don't see the point of wrapping this in optional instead of just returning 
null. It's no safer and now requires more syntax 

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang][HLSL][SPRI-V] Add convergence intrinsics (PR #80680)

2024-03-11 Thread Matt Arsenault via cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= 
Message-ID:
In-Reply-To: 



@@ -1130,8 +1130,96 @@ struct BitTest {
 
   static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
 };
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB) 
{
+  for (auto &I : *BB) {
+auto *II = dyn_cast(&I);
+if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
+  return II;
+  }
+  return std::nullopt;
+}
+
 } // namespace
 
+llvm::CallBase *
+CodeGenFunction::AddConvergenceControlAttr(llvm::CallBase *Input,
+   llvm::Value *ParentToken) {
+  llvm::Value *bundleArgs[] = {ParentToken};

arsenm wrote:

Can you avoid bundleArgs and just construct the array inline? 

https://github.com/llvm/llvm-project/pull/80680
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow access to a public template alias declaration that refers to friend's private nested type (PR #83847)

2024-03-11 Thread Qizhi Hu via cfe-commits


@@ -259,6 +259,10 @@ Bug Fixes in This Version
   operator.
   Fixes (#GH83267).
 
+- Allow access to a public template alias declaration that refers to friend's

jcsxky wrote:

Updated and put it to the right place.

https://github.com/llvm/llvm-project/pull/83847
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow access to a public template alias declaration that refers to friend's private nested type (PR #83847)

2024-03-11 Thread Qizhi Hu via cfe-commits


@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++11 -verify %s
+// RUN: %clang_cc1 -std=c++14 -verify %s
+// RUN: %clang_cc1 -std=c++17 -verify %s
+// RUN: %clang_cc1 -std=c++20 -verify %s

jcsxky wrote:

Fixed.

https://github.com/llvm/llvm-project/pull/83847
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow access to a public template alias declaration that refers to friend's private nested type (PR #83847)

2024-03-11 Thread Qizhi Hu via cfe-commits


@@ -409,7 +413,7 @@ RISC-V Support
 CUDA/HIP Language Changes
 ^
 
-- PTX is no longer included by default when compiling for CUDA. Using 
+- PTX is no longer included by default when compiling for CUDA. Using

jcsxky wrote:

Fixed.

https://github.com/llvm/llvm-project/pull/83847
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow access to a public template alias declaration that refers to friend's private nested type (PR #83847)

2024-03-11 Thread Qizhi Hu via cfe-commits


@@ -4342,10 +4342,17 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
 InstantiatingTemplate Inst(*this, TemplateLoc, Template);
 if (Inst.isInvalid())
   return QualType();
+if (!AliasTemplate->getDeclContext()->isFileContext()) {
+  ContextRAII SavedContext(*this, AliasTemplate->getDeclContext());

jcsxky wrote:

Looks more clear. Thanks for your guidance!

https://github.com/llvm/llvm-project/pull/83847
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Don't do casting of atomic FP loads/stores in FE. (PR #83446)

2024-03-11 Thread Matt Arsenault via cfe-commits


@@ -1953,13 +1966,22 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, 
LValue dest,
 }
 
 // Okay, we're doing this natively.
-llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
+llvm::Value *ValToStore =
+atomics.convertRValueToInt(rvalue, /*CastFP=*/false);
 
 // Do the atomic store.
-Address addr = atomics.castToAtomicIntPointer(atomics.getAtomicAddress());
-intValue = Builder.CreateIntCast(
-intValue, addr.getElementType(), /*isSigned=*/false);
-llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+Address Addr = atomics.getAtomicAddress();
+bool ShouldCastToInt = true;
+if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue))
+  if (isa(Value->getType()) ||
+  Value->getType()->isIEEELikeFPTy())
+ShouldCastToInt = false;
+if (ShouldCastToInt) {

arsenm wrote:

Factoring this into a shouldCastToInt helper function would be better, but it 
matters little if you're just going to remove this soon anyway 

https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Don't do casting of atomic FP loads/stores in FE. (PR #83446)

2024-03-11 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Don't do casting of atomic FP loads/stores in FE. (PR #83446)

2024-03-11 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] d02d8df - [clang][Interp] Implement _Complex negation

2024-03-11 Thread Timm Bäder via cfe-commits

Author: Timm Bäder
Date: 2024-03-12T05:34:57+01:00
New Revision: d02d8df0cd797342f7042440e07133e99ad5e0a2

URL: 
https://github.com/llvm/llvm-project/commit/d02d8df0cd797342f7042440e07133e99ad5e0a2
DIFF: 
https://github.com/llvm/llvm-project/commit/d02d8df0cd797342f7042440e07133e99ad5e0a2.diff

LOG: [clang][Interp] Implement _Complex negation

Factor complex unary operations into their own function.

Added: 


Modified: 
clang/lib/AST/Interp/ByteCodeExprGen.cpp
clang/lib/AST/Interp/ByteCodeExprGen.h

Removed: 




diff  --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp 
b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index a384e191464fea..0dd645990d1d58 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -2959,6 +2959,8 @@ bool ByteCodeExprGen::VisitCXXThisExpr(const 
CXXThisExpr *E) {
 template 
 bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) {
   const Expr *SubExpr = E->getSubExpr();
+  if (SubExpr->getType()->isAnyComplexType())
+return this->VisitComplexUnaryOperator(E);
   std::optional T = classify(SubExpr->getType());
 
   switch (E->getOpcode()) {
@@ -3109,16 +3111,81 @@ bool ByteCodeExprGen::VisitUnaryOperator(const 
UnaryOperator *E) {
   return false;
 return DiscardResult ? this->emitPop(*T, E) : this->emitComp(*T, E);
   case UO_Real: // __real x
-if (T)
-  return this->delegate(SubExpr);
-return this->emitComplexReal(SubExpr);
+assert(T);
+return this->delegate(SubExpr);
   case UO_Imag: { // __imag x
-if (T) {
-  if (!this->discard(SubExpr))
+assert(T);
+if (!this->discard(SubExpr))
+  return false;
+return this->visitZeroInitializer(*T, SubExpr->getType(), SubExpr);
+  }
+  case UO_Extension:
+return this->delegate(SubExpr);
+  case UO_Coawait:
+assert(false && "Unhandled opcode");
+  }
+
+  return false;
+}
+
+template 
+bool ByteCodeExprGen::VisitComplexUnaryOperator(
+const UnaryOperator *E) {
+  const Expr *SubExpr = E->getSubExpr();
+  assert(SubExpr->getType()->isAnyComplexType());
+
+  if (DiscardResult)
+return this->discard(SubExpr);
+
+  std::optional ResT = classify(E);
+
+  // Prepare storage for result.
+  if (!ResT && !Initializing) {
+std::optional LocalIndex =
+allocateLocal(SubExpr, /*IsExtended=*/false);
+if (!LocalIndex)
+  return false;
+if (!this->emitGetPtrLocal(*LocalIndex, E))
+  return false;
+  }
+
+  // The offset of the temporary, if we created one.
+  unsigned SubExprOffset = ~0u;
+  auto createTemp = [=, &SubExprOffset]() -> bool {
+SubExprOffset = this->allocateLocalPrimitive(SubExpr, PT_Ptr, true, false);
+if (!this->visit(SubExpr))
+  return false;
+return this->emitSetLocal(PT_Ptr, SubExprOffset, E);
+  };
+
+  PrimType ElemT = classifyComplexElementType(SubExpr->getType());
+  auto getElem = [=](unsigned Offset, unsigned Index) -> bool {
+if (!this->emitGetLocal(PT_Ptr, Offset, E))
+  return false;
+return this->emitArrayElemPop(ElemT, Index, E);
+  };
+
+  switch (E->getOpcode()) {
+  case UO_Minus:
+if (!createTemp())
+  return false;
+for (unsigned I = 0; I != 2; ++I) {
+  if (!getElem(SubExprOffset, I))
+return false;
+  if (!this->emitNeg(ElemT, E))
+return false;
+  if (!this->emitInitElem(ElemT, I, E))
 return false;
-  return this->visitZeroInitializer(*T, SubExpr->getType(), SubExpr);
 }
+break;
+
+  case UO_AddrOf:
+return this->delegate(SubExpr);
 
+  case UO_Real:
+return this->emitComplexReal(SubExpr);
+
+  case UO_Imag:
 if (!this->visit(SubExpr))
   return false;
 
@@ -3131,14 +3198,12 @@ bool ByteCodeExprGen::VisitUnaryOperator(const 
UnaryOperator *E) {
 // Since our _Complex implementation does not map to a primitive type,
 // we sometimes have to do the lvalue-to-rvalue conversion here manually.
 return this->emitArrayElemPop(classifyPrim(E->getType()), 1, E);
-  }
-  case UO_Extension:
-return this->delegate(SubExpr);
-  case UO_Coawait:
-assert(false && "Unhandled opcode");
+
+  default:
+return this->emitInvalid(E);
   }
 
-  return false;
+  return true;
 }
 
 template 

diff  --git a/clang/lib/AST/Interp/ByteCodeExprGen.h 
b/clang/lib/AST/Interp/ByteCodeExprGen.h
index 5977bb5e6ff25d..5ad2e74d7c2693 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -75,6 +75,7 @@ class ByteCodeExprGen : public 
ConstStmtVisitor, bool>,
   bool VisitGNUNullExpr(const GNUNullExpr *E);
   bool VisitCXXThisExpr(const CXXThisExpr *E);
   bool VisitUnaryOperator(const UnaryOperator *E);
+  bool VisitComplexUnaryOperator(const UnaryOperator *E);
   bool VisitDeclRefExpr(const DeclRefExpr *E);
   bool VisitImplicitValueInitExpr(const ImplicitValueInitExpr *E);
   bool VisitSubstNonTypeTemplateParmExpr(const SubstNonTypeTempla

[clang] [llvm] [WebAssembly] Implement an alternative translation for -wasm-enable-sjlj (PR #84137)

2024-03-11 Thread YAMAMOTO Takashi via cfe-commits

yamt wrote:

@aheejin @sbc100 
let me confirm the plan on this PR.
i can remove the option `-mllvm -experimental-wasm-enable-alt-sjlj` by making 
it unconditionally true, and update tests, right?


https://github.com/llvm/llvm-project/pull/84137
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Ian Anderson via cfe-commits

https://github.com/ian-twilightcoder edited 
https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Ian Anderson via cfe-commits


@@ -7,6 +7,11 @@
  *===---===
  */
 
-#ifndef offsetof
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(offsetof) ||  
\
+(__has_feature(modules) && !__building_module(_Builtin_stddef))

ian-twilightcoder wrote:

This header shouldn't be seen at all, and `_Builtin_stddef.offset` should 
instead be imported/made visible

https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Ian Anderson via cfe-commits


@@ -2498,9 +2498,12 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind 
LeadingToken,
   }
 
   bool NeedsFramework = false;
-  // Don't add the top level headers to the builtin modules if the builtin 
headers
-  // belong to the system modules.
-  if (!Map.LangOpts.BuiltinHeadersInSystemModules || 
ActiveModule->isSubModule() || !isBuiltInModuleName(ActiveModule->Name))
+  // Don't add headers to the builtin modules if the builtin headers belong to
+  // the system modules, with the exception of __stddef_max_align_t.h which
+  // always had its own module.
+  if (!Map.LangOpts.BuiltinHeadersInSystemModules ||
+  !isBuiltInModuleName(ActiveModule->getTopLevelModuleName()) ||
+  ActiveModule->fullModuleNameIs({"_Builtin_stddef", "max_align_t"}))

ian-twilightcoder wrote:

I don't really know the right answer, __stddef_wint_t.h is a weird one. 
Strictly speaking it wasn't modular so anyone could import it previously. But 
then it's not really supposed to be part of stddef.h, and you have to 
specifically opt into seeing it., i.e. if you just include stddef.h you never 
got __stddef_wint_t.h. So maybe it's ok that it's unconditionally in its own 
module. Or maybe it needs to be added to `isBuiltInModuleName`.

https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-format][NFC] Eliminate the IsCpp parameter in all functions (PR #84599)

2024-03-11 Thread Owen Pan via cfe-commits

owenca wrote:

> I mean multiple threads in the same process with different languages. maybe 
> unlikely, but not impossible.

But libFormat/clang-format are _not_ multithreaded, right?

https://github.com/llvm/llvm-project/pull/84599
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Add -HV option translation to clang-dxc.exe (PR #83938)

2024-03-11 Thread Joshua Batista via cfe-commits

https://github.com/bob80905 updated 
https://github.com/llvm/llvm-project/pull/83938

>From 7453ffdea39c624221c9696394bbd47be7eec662 Mon Sep 17 00:00:00 2001
From: Joshua Batista 
Date: Mon, 4 Mar 2024 13:42:02 -0800
Subject: [PATCH 1/8] first try

---
 clang/include/clang/Driver/Options.td | 4 
 clang/lib/Driver/ToolChains/HLSL.cpp  | 8 
 2 files changed, 12 insertions(+)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index bef38738fde82e..6fe3dea1655b24 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8545,6 +8545,10 @@ def dxc_entrypoint : Option<["--", "/", "-"], "E", 
KIND_JOINED_OR_SEPARATE>,
  Group,
  Visibility<[DXCOption]>,
  HelpText<"Entry point name">;
+def dxc_HlslVersion : Option<["--", "/", "-"], "HV", KIND_JOINED_OR_SEPARATE>,
+ Group,
+ Visibility<[DXCOption]>,
+ HelpText<"HLSL Version">;
 def dxc_validator_path_EQ : Joined<["--"], "dxv-path=">, Group,
   HelpText<"DXIL validator installation path">;
 def dxc_disable_validation : DXCFlag<"Vd">,
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp 
b/clang/lib/Driver/ToolChains/HLSL.cpp
index c6ad862b229420..0ffc15155e4db3 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -226,6 +226,14 @@ HLSLToolChain::TranslateArgs(const DerivedArgList &Args, 
StringRef BoundArch,
   A->claim();
   continue;
 }
+if (A->getOption().getID() == options::OPT_HLSL_Version) {
+  // Translate -HV into -std for llvm
+  DAL->AddSeparateArg(nullptr,
+  Opts.getOption(options::OPT_stdlibxx_isystem),
+  A->getValue());
+  A->claim();
+  continue;
+}
 DAL->append(A);
   }
 

>From 69953d737b842f2144ebe0519d810c57b5c031b9 Mon Sep 17 00:00:00 2001
From: Joshua Batista 
Date: Mon, 4 Mar 2024 17:11:37 -0800
Subject: [PATCH 2/8] add HV option, and translation test

---
 clang/include/clang/Driver/Options.td |  2 +-
 clang/lib/Driver/ToolChains/HLSL.cpp  | 22 ++
 clang/test/Options/HV.hlsl| 13 +
 3 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/Options/HV.hlsl

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 6fe3dea1655b24..c4caf232887b56 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8545,7 +8545,7 @@ def dxc_entrypoint : Option<["--", "/", "-"], "E", 
KIND_JOINED_OR_SEPARATE>,
  Group,
  Visibility<[DXCOption]>,
  HelpText<"Entry point name">;
-def dxc_HlslVersion : Option<["--", "/", "-"], "HV", KIND_JOINED_OR_SEPARATE>,
+def dxc_hlsl_version : Option<["--", "/", "-"], "HV", KIND_JOINED_OR_SEPARATE>,
  Group,
  Visibility<[DXCOption]>,
  HelpText<"HLSL Version">;
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp 
b/clang/lib/Driver/ToolChains/HLSL.cpp
index 0ffc15155e4db3..fe258919dedf3e 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -226,11 +226,25 @@ HLSLToolChain::TranslateArgs(const DerivedArgList &Args, 
StringRef BoundArch,
   A->claim();
   continue;
 }
-if (A->getOption().getID() == options::OPT_HLSL_Version) {
+if (A->getOption().getID() == options::OPT_dxc_hlsl_version) {
   // Translate -HV into -std for llvm
-  DAL->AddSeparateArg(nullptr,
-  Opts.getOption(options::OPT_stdlibxx_isystem),
-  A->getValue());
+  // depending on the value given, assign std to:
+  // c++14,c++17,c++20,c++latest,c11,c17
+  const char *value = A->getValue();
+  if (strcmp(value, "2016") == 0) {
+DAL->AddSeparateArg(nullptr, Opts.getOption(options::OPT_std_EQ),
+"hlsl2016");
+  } else if (strcmp(value, "2017") == 0) {
+DAL->AddSeparateArg(nullptr, Opts.getOption(options::OPT_std_EQ),
+"hlsl2017");
+  } else if (strcmp(value, "2018") == 0) {
+DAL->AddSeparateArg(nullptr, Opts.getOption(options::OPT_std_EQ),
+"hlsl2018");
+  } else if (strcmp(value, "2021") == 0) {
+DAL->AddSeparateArg(nullptr, Opts.getOption(options::OPT_std_EQ),
+"hlsl2021");
+  }
+
   A->claim();
   continue;
 }
diff --git a/clang/test/Options/HV.hlsl b/clang/test/Options/HV.hlsl
new file mode 100644
index 00..59158ff2f001ed
--- /dev/null
+++ b/clang/test/Options/HV.hlsl
@@ -0,0 +1,13 @@
+// RUN: %clang_dxc -T lib_6_4 -HV 2016 %s 2>&1 -###   | FileCheck 
-check-prefix=2016 %s
+// RUN: %clang_dxc -T lib_6_4 -HV 2017 %s 2>&1 -###   | FileCheck 
-check

[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-11 Thread via cfe-commits

github-actions[bot] wrote:



Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be
notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this 
page.

If this is not working for you, it is probably because you do not have write
permissions for the repository. In which case you can instead tag reviewers by
name in a comment by using `@` followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review
by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate
is once a week. Please remember that you are asking for valuable time from 
other developers.

If you have further questions, they may be answered by the [LLVM GitHub User 
Guide](https://llvm.org/docs/GitHub.html).

You can also ask questions in a comment on this PR, on the [LLVM 
Discord](https://discord.com/invite/xS7Z362) or on the 
[forums](https://discourse.llvm.org/).

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-11 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel created 
https://github.com/llvm/llvm-project/pull/84864

This change:
- Updates the existing Clang User's Manual section on SPGO so that it describes 
how to use llvm-profgen to perform SPGO on Windows. This is new functionality 
implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.

>From 4dc108d0d290ee5fd6a73c029c051fdb2215d00a Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 11 Mar 2024 22:35:59 -0400
Subject: [PATCH] Update documentation and release notes for llvm-profgen COFF
 support

This change:
- Updates the existing Clang User's Manual section on SPGO so that it
  describes how to use llvm-profgen to perform SPGO on Windows. This is
  new functionality implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.
---
 clang/docs/UsersManual.rst | 47 +++---
 llvm/docs/ReleaseNotes.rst |  5 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 7391e4cf3a9aeb..9cf313c3727125 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
+   available as part of `Intel VTune
+   
`_.
+
+   The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
+   external tool, AutoFDO, also supports Linux Perf output.
+
+   When using Perf:
 
.. code-block:: console
 
@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code
+
+   This produces a ``perf.data.script`` output which can be used with
+   ``llvm-profgen``'s ``--perfscript`` input option.
+
 3. Convert the collected profile data to LLVM's sample profile format.
This is currently supported via the AutoFDO converter ``create_llvm_prof``.
It is available at https://github.com/google/autofdo. Once built and
@@ -2454,7 +2478,14 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
+ $ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
+
+   When using SEP the output is in the textual format corresponding to
+   `llvm-profgen --perfscript`. For example:
+
+   .. code-block:: console
+
+ $ llvm-profgen --binary=./code --output=code.prof 
--perfscript=perf.data.script
 
 
 4. Build the code again using the collected profile. This step feeds
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b34a5f31c5eb0a..c2bbc647bc18e6 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -157,6 +157,11 @@ Changes to the LLVM tools
   ``--set-symbols-visibility`` options for ELF input to change the
   visibility of symbols.
 
+* llvm-profgen now supports COFF+DWARF binaries. This enables Sample-based PGO
+  on Windows using Intel VTune's SEP

[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

2024-03-11 Thread Chuanqi Xu via cfe-commits


@@ -893,102 +889,118 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
 FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  if (Verbose) {
-llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
- << " files using " << Pool.getMaxConcurrency() << " 
workers\n";
-  }
-
-  llvm::Timer T;
-  T.startTimer();
-
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-Pool.async([&, I]() {
-  llvm::DenseSet AlreadySeenModules;
-  while (auto MaybeInputIndex = GetNextInputIndex()) {
-size_t LocalIndex = *MaybeInputIndex;
-const tooling::CompileCommand *Input = &Inputs[LocalIndex];
-std::string Filename = std::move(Input->Filename);
-std::string CWD = std::move(Input->Directory);
-
-std::optional MaybeModuleName;
-if (!ModuleName.empty())
-  MaybeModuleName = ModuleName;
-
-std::string OutputDir(ModuleFilesDir);
-if (OutputDir.empty())
-  OutputDir = getModuleCachePath(Input->CommandLine);
-auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
-  return ::lookupModuleOutput(MID, MOK, OutputDir);
-};
-
-// Run the tool on it.
-if (Format == ScanningOutputFormat::Make) {
-  auto MaybeFile =
-  WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
-  if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
- Errs))
-HadErrors = true;
-} else if (Format == ScanningOutputFormat::P1689) {
-  // It is useful to generate the make-format dependency output during
-  // the scanning for P1689. Otherwise the users need to scan again for
-  // it. We will generate the make-format dependency output if we find
-  // `-MF` in the command lines.
-  std::string MakeformatOutputPath;
-  std::string MakeformatOutput;
-
-  auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
-  *Input, CWD, MakeformatOutput, MakeformatOutputPath);
-
-  if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
-HadErrors = true;
+  std::vector> WorkerTools;

ChuanqiXu9 wrote:

Nice catch. Done.

https://github.com/llvm/llvm-project/pull/84285
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

2024-03-11 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/84285

>From 48b3261e1d217b7ce78180314a222dca4d6aba18 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Thu, 7 Mar 2024 15:19:28 +0800
Subject: [PATCH 1/5] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use
 thread pool in P1689 per file mode

I suddenly found that the clang scan deps may use all concurrent threads
to scan the files. It makes sense in the batch mode. But in P1689
per file mode, it simply wastes times.
---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 204 +-
 1 file changed, 108 insertions(+), 96 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp 
b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index d042fecc3dbe63..843816a8ed6515 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -744,6 +744,9 @@ getCompilationDataBase(int argc, char **argv, std::string 
&ErrorMessage) {
 return nullptr;
   }
 
+  // Only 1 threads is required if P1689 per file mode.
+  NumThreads = 1;
+
   // There might be multiple jobs for a compilation. Extract the specified
   // output filename from the last job.
   auto LastCmd = C->getJobs().end();
@@ -867,13 +870,6 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
 
-  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
-EagerLoadModules);
-  llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
-  std::vector> WorkerTools;
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-WorkerTools.push_back(std::make_unique(Service));
-
   std::vector Inputs =
   AdjustingCompilations->getAllCompileCommands();
 
@@ -893,102 +889,118 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
 FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  if (Verbose) {
-llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
- << " files using " << Pool.getMaxConcurrency() << " 
workers\n";
-  }
-
-  llvm::Timer T;
-  T.startTimer();
-
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-Pool.async([&, I]() {
-  llvm::DenseSet AlreadySeenModules;
-  while (auto MaybeInputIndex = GetNextInputIndex()) {
-size_t LocalIndex = *MaybeInputIndex;
-const tooling::CompileCommand *Input = &Inputs[LocalIndex];
-std::string Filename = std::move(Input->Filename);
-std::string CWD = std::move(Input->Directory);
-
-std::optional MaybeModuleName;
-if (!ModuleName.empty())
-  MaybeModuleName = ModuleName;
-
-std::string OutputDir(ModuleFilesDir);
-if (OutputDir.empty())
-  OutputDir = getModuleCachePath(Input->CommandLine);
-auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
-  return ::lookupModuleOutput(MID, MOK, OutputDir);
-};
-
-// Run the tool on it.
-if (Format == ScanningOutputFormat::Make) {
-  auto MaybeFile =
-  WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
-  if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
- Errs))
-HadErrors = true;
-} else if (Format == ScanningOutputFormat::P1689) {
-  // It is useful to generate the make-format dependency output during
-  // the scanning for P1689. Otherwise the users need to scan again for
-  // it. We will generate the make-format dependency output if we find
-  // `-MF` in the command lines.
-  std::string MakeformatOutputPath;
-  std::string MakeformatOutput;
-
-  auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
-  *Input, CWD, MakeformatOutput, MakeformatOutputPath);
-
-  if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
-HadErrors = true;
+  std::vector> WorkerTools;
 
-  if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
-  !HadErrors) {
-static std::mutex Lock;
-// With compilation database, we may open different files
-// concurrently or we may write the same file concurrently. So we
-// use a map here to allow multiple compile commands to write to 
the
-// same file. Also we need a lock here to avoid data race.
-static llvm::StringMap OSs;
-std::unique_lock LockGuard(Lock);
-
-auto OSIter = OSs.find(MakeformatOutputPath);
-if (OSIter == OSs.end()) {
-  std::error_code EC;
-  OSIter = OSs.try_emplace(MakeformatOutputPath,
-   MakeformatOutputPath

[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Volodymyr Sapsai via cfe-commits


@@ -7,6 +7,11 @@
  *===---===
  */
 
-#ifndef offsetof
+/*
+ * When -fbuiltin-headers-in-system-modules is set this is a non-modular header
+ * and needs to behave as if it was textual.
+ */
+#if !defined(offsetof) ||  
\
+(__has_feature(modules) && !__building_module(_Builtin_stddef))

vsapsai wrote:

What should happen when `offsetof` is defined and we are building 
non-`_Builtin_stddef` module?

https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Volodymyr Sapsai via cfe-commits


@@ -2498,9 +2498,12 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind 
LeadingToken,
   }
 
   bool NeedsFramework = false;
-  // Don't add the top level headers to the builtin modules if the builtin 
headers
-  // belong to the system modules.
-  if (!Map.LangOpts.BuiltinHeadersInSystemModules || 
ActiveModule->isSubModule() || !isBuiltInModuleName(ActiveModule->Name))
+  // Don't add headers to the builtin modules if the builtin headers belong to
+  // the system modules, with the exception of __stddef_max_align_t.h which
+  // always had its own module.
+  if (!Map.LangOpts.BuiltinHeadersInSystemModules ||
+  !isBuiltInModuleName(ActiveModule->getTopLevelModuleName()) ||
+  ActiveModule->fullModuleNameIs({"_Builtin_stddef", "max_align_t"}))

vsapsai wrote:

Should `_Builtin_stddef_wint_t` be a part of this check too? I don't know the 
right answer, just trying to understand.

https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Volodymyr Sapsai via cfe-commits

https://github.com/vsapsai commented:

Still kinda confused. Have a few questions trying to improve my understanding.

https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] giving the __stddef_ headers their own modules can cause redeclaration errors with -fbuiltin-headers-in-system-modules (PR #84127)

2024-03-11 Thread Volodymyr Sapsai via cfe-commits

https://github.com/vsapsai edited 
https://github.com/llvm/llvm-project/pull/84127
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

2024-03-11 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/84285

>From 48b3261e1d217b7ce78180314a222dca4d6aba18 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Thu, 7 Mar 2024 15:19:28 +0800
Subject: [PATCH 1/4] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use
 thread pool in P1689 per file mode

I suddenly found that the clang scan deps may use all concurrent threads
to scan the files. It makes sense in the batch mode. But in P1689
per file mode, it simply wastes times.
---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 204 +-
 1 file changed, 108 insertions(+), 96 deletions(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp 
b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index d042fecc3dbe63..843816a8ed6515 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -744,6 +744,9 @@ getCompilationDataBase(int argc, char **argv, std::string 
&ErrorMessage) {
 return nullptr;
   }
 
+  // Only 1 threads is required if P1689 per file mode.
+  NumThreads = 1;
+
   // There might be multiple jobs for a compilation. Extract the specified
   // output filename from the last job.
   auto LastCmd = C->getJobs().end();
@@ -867,13 +870,6 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
 
-  DependencyScanningService Service(ScanMode, Format, OptimizeArgs,
-EagerLoadModules);
-  llvm::DefaultThreadPool Pool(llvm::hardware_concurrency(NumThreads));
-  std::vector> WorkerTools;
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I)
-WorkerTools.push_back(std::make_unique(Service));
-
   std::vector Inputs =
   AdjustingCompilations->getAllCompileCommands();
 
@@ -893,102 +889,118 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   if (Format == ScanningOutputFormat::Full)
 FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
 
-  if (Verbose) {
-llvm::outs() << "Running clang-scan-deps on " << Inputs.size()
- << " files using " << Pool.getMaxConcurrency() << " 
workers\n";
-  }
-
-  llvm::Timer T;
-  T.startTimer();
-
-  for (unsigned I = 0; I < Pool.getMaxConcurrency(); ++I) {
-Pool.async([&, I]() {
-  llvm::DenseSet AlreadySeenModules;
-  while (auto MaybeInputIndex = GetNextInputIndex()) {
-size_t LocalIndex = *MaybeInputIndex;
-const tooling::CompileCommand *Input = &Inputs[LocalIndex];
-std::string Filename = std::move(Input->Filename);
-std::string CWD = std::move(Input->Directory);
-
-std::optional MaybeModuleName;
-if (!ModuleName.empty())
-  MaybeModuleName = ModuleName;
-
-std::string OutputDir(ModuleFilesDir);
-if (OutputDir.empty())
-  OutputDir = getModuleCachePath(Input->CommandLine);
-auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) {
-  return ::lookupModuleOutput(MID, MOK, OutputDir);
-};
-
-// Run the tool on it.
-if (Format == ScanningOutputFormat::Make) {
-  auto MaybeFile =
-  WorkerTools[I]->getDependencyFile(Input->CommandLine, CWD);
-  if (handleMakeDependencyToolResult(Filename, MaybeFile, DependencyOS,
- Errs))
-HadErrors = true;
-} else if (Format == ScanningOutputFormat::P1689) {
-  // It is useful to generate the make-format dependency output during
-  // the scanning for P1689. Otherwise the users need to scan again for
-  // it. We will generate the make-format dependency output if we find
-  // `-MF` in the command lines.
-  std::string MakeformatOutputPath;
-  std::string MakeformatOutput;
-
-  auto MaybeRule = WorkerTools[I]->getP1689ModuleDependencyFile(
-  *Input, CWD, MakeformatOutput, MakeformatOutputPath);
-
-  if (handleP1689DependencyToolResult(Filename, MaybeRule, PD, Errs))
-HadErrors = true;
+  std::vector> WorkerTools;
 
-  if (!MakeformatOutputPath.empty() && !MakeformatOutput.empty() &&
-  !HadErrors) {
-static std::mutex Lock;
-// With compilation database, we may open different files
-// concurrently or we may write the same file concurrently. So we
-// use a map here to allow multiple compile commands to write to 
the
-// same file. Also we need a lock here to avoid data race.
-static llvm::StringMap OSs;
-std::unique_lock LockGuard(Lock);
-
-auto OSIter = OSs.find(MakeformatOutputPath);
-if (OSIter == OSs.end()) {
-  std::error_code EC;
-  OSIter = OSs.try_emplace(MakeformatOutputPath,
-   MakeformatOutputPath

[clang] [NFC] [C++20] [Modules] [P1689] [Scanner] Don't use thread pool in P1689 per file mode (PR #84285)

2024-03-11 Thread Chuanqi Xu via cfe-commits


@@ -744,6 +744,9 @@ getCompilationDataBase(int argc, char **argv, std::string 
&ErrorMessage) {
 return nullptr;
   }
 
+  // Only 1 threads is required if P1689 per file mode.
+  NumThreads = 1;

ChuanqiXu9 wrote:

Oh, sorry. I don't know why I missed this somehow. This should be done now.

https://github.com/llvm/llvm-project/pull/84285
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [C++20] [Modules] Introduce a tool 'clang-named-modules-querier' and two plugins 'ClangGetUsedFilesFromModulesPlugin' and 'ClangGetDeclsInModulesPlugin' (PR #72956)

2024-03-11 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> > ClangGetUsedFilesFromModulesPlugin
> 
> This has a hole where if a currently-unused file is not listed, but it is 
> changed in such a way that it now matters (e.g., it changes include order, 
> adds/removes includes, etc.), we need to recompile consumers.
> 
> > what happens if someone adds an overload, or other interesting name 
> > resolution to the module?
> 
> We would need to do (at least) one of:
> 
> * track considered-but-discarded decls (e.g., if something SFINAE'd away now 
> matters because of a new decl);
> * track "new" decls since the last compile (not sure how the state tracking 
> works here though) and recompile if any show up
> 
> > ```
> >  "col": 12,
> >  "kind": "Function",
> >  "line": 3,
> > ```
> 
> So we change the decl hash if a comment adds a line? That seems like 
> low-hanging fruit to me. Can we enumerate decls and use an index instead? 
> That depends on preprocessor state though, so may be hard to externally 
> verify…

Why I chose to use the location is that in the discourse discussion, it shows 
the location is still meaningful in debug information in generated objects. And 
for index..., I feel it is hard to tell the meaning of the index outside the 
compiler. As you said, how can a consumer be sure that 2 decls with the same 
index in 2 different invocations to be the same one...

https://github.com/llvm/llvm-project/pull/72956
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Reland "[clang][modules] Print library module manifest path." (PR #82160)

2024-03-11 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> @ChuanqiXu9 since you suggested this test approach

It looks like that I failed to understand VE is under X86 also...

> do you know whether this is the expected behavior of -sysroot ?

I am not sure. This is surprising to me too.



> The point is to build libraries and test clang at once. The `TC.getFilePaths` 
> is constructed from following items in `ToolChain::ToolChain`
> 
> 1. `BUILD/bin/../lib/x86_64-unknown-linux-gnu`
> 2. `%t/Inputs/usr/lib/x86_64-linux-gnu`
> 3. `%t/Inputs/usr/lib`
> 
> Yes. It contains 1st item at run time if you build libraries. As a result, 
> `GetStdModuleManifestPath` find 
> `BUILD/bin/../lib/x86_64-unknown-linux-gnu/libc++.so` first. However, 
> `modules.json` is at `%t/Inputs/usr/lib/x86_64-linux-gnu/modules.json`.
> 
> Hope this helps you.

It looks like this is not special to VE but reproducible for every constructing 
the clang and libc++ at once. So  I'll try to look at it if I can reproduce it 
locally.

https://github.com/llvm/llvm-project/pull/82160
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow access to a public template alias declaration that refers to friend's private nested type (PR #83847)

2024-03-11 Thread Qizhi Hu via cfe-commits

https://github.com/jcsxky updated 
https://github.com/llvm/llvm-project/pull/83847

>From 32bcc78c7d563bda920b3b6150dc1149e1ca1df1 Mon Sep 17 00:00:00 2001
From: huqizhi 
Date: Mon, 4 Mar 2024 21:51:07 +0800
Subject: [PATCH] [Clang][Sema] Allow access to a public template alias
 declaration that refers to friend's private nested type

---
 clang/docs/ReleaseNotes.rst |  2 ++
 clang/lib/Sema/SemaTemplate.cpp | 10 +++---
 clang/test/SemaTemplate/PR25708.cpp | 20 
 3 files changed, 29 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/SemaTemplate/PR25708.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 88e552d5c46113..a49504b71ad18d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -351,6 +351,8 @@ Bug Fixes to C++ Support
   when one of the function had more specialized templates.
   Fixes (`#82509 `_)
   and (`#74494 `_)
+- Allow access to a public template alias declaration that refers to friend's
+  private nested type. (#GH25708).
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index d62095558d0ffb..d8c9a5c09944c4 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4343,9 +4343,13 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
 if (Inst.isInvalid())
   return QualType();
 
-CanonType = SubstType(Pattern->getUnderlyingType(),
-  TemplateArgLists, AliasTemplate->getLocation(),
-  AliasTemplate->getDeclName());
+std::optional SavedContext;
+if (!AliasTemplate->getDeclContext()->isFileContext())
+  SavedContext.emplace(*this, AliasTemplate->getDeclContext());
+
+CanonType =
+SubstType(Pattern->getUnderlyingType(), TemplateArgLists,
+  AliasTemplate->getLocation(), AliasTemplate->getDeclName());
 if (CanonType.isNull()) {
   // If this was enable_if and we failed to find the nested type
   // within enable_if in a SFINAE context, dig out the specific
diff --git a/clang/test/SemaTemplate/PR25708.cpp 
b/clang/test/SemaTemplate/PR25708.cpp
new file mode 100644
index 00..6a214fc6b43bc1
--- /dev/null
+++ b/clang/test/SemaTemplate/PR25708.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++11 -verify %s
+// expected-no-diagnostics
+
+struct FooAccessor
+{
+template 
+using Foo = typename T::Foo;
+};
+
+class Type
+{
+friend struct FooAccessor;
+
+using Foo = int;
+};
+
+int main()
+{
+FooAccessor::Foo t;
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow access to a public template alias declaration that refers to friend's private nested type (PR #83847)

2024-03-11 Thread Qizhi Hu via cfe-commits

https://github.com/jcsxky updated 
https://github.com/llvm/llvm-project/pull/83847

>From 043392e7b69e552ac5262df7ebf73e648844 Mon Sep 17 00:00:00 2001
From: huqizhi 
Date: Mon, 4 Mar 2024 21:51:07 +0800
Subject: [PATCH] [Clang][Sema] Allow access to a public template alias
 declaration that refers to friend's private nested type

---
 clang/docs/ReleaseNotes.rst |  2 ++
 clang/lib/Sema/SemaTemplate.cpp | 11 ---
 clang/test/SemaTemplate/PR25708.cpp | 20 
 3 files changed, 30 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/SemaTemplate/PR25708.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 88e552d5c46113..a49504b71ad18d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -351,6 +351,8 @@ Bug Fixes to C++ Support
   when one of the function had more specialized templates.
   Fixes (`#82509 `_)
   and (`#74494 `_)
+- Allow access to a public template alias declaration that refers to friend's
+  private nested type. (#GH25708).
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index d62095558d0ffb..87a7eefc89690e 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4343,9 +4343,14 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
 if (Inst.isInvalid())
   return QualType();
 
-CanonType = SubstType(Pattern->getUnderlyingType(),
-  TemplateArgLists, AliasTemplate->getLocation(),
-  AliasTemplate->getDeclName());
+std::optional SavedContext;
+if (!AliasTemplate->getDeclContext()->isFileContext())
+  SavedContext.emplace(*this, AliasTemplate->getDeclContext());
+
+CanonType =
+SubstType(Pattern->getUnderlyingType(), TemplateArgLists,
+  AliasTemplate->getLocation(), AliasTemplate->getDeclName());
+
 if (CanonType.isNull()) {
   // If this was enable_if and we failed to find the nested type
   // within enable_if in a SFINAE context, dig out the specific
diff --git a/clang/test/SemaTemplate/PR25708.cpp 
b/clang/test/SemaTemplate/PR25708.cpp
new file mode 100644
index 00..6a214fc6b43bc1
--- /dev/null
+++ b/clang/test/SemaTemplate/PR25708.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++11 -verify %s
+// expected-no-diagnostics
+
+struct FooAccessor
+{
+template 
+using Foo = typename T::Foo;
+};
+
+class Type
+{
+friend struct FooAccessor;
+
+using Foo = int;
+};
+
+int main()
+{
+FooAccessor::Foo t;
+}

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Farzon Lotfi via cfe-commits

https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/84820

>From a46ecdee6356e744a80f3c29748e7c3482a89760 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi 
Date: Mon, 11 Mar 2024 15:17:35 -0400
Subject: [PATCH 1/2] [HLSL] Implement `rsqrt` intrinsic This change implements
 #70074 - `hlsl_intrinsics.h - add the rsqrt api - `DXIL.td` add the llvm
 intrinsic to DXIL op lowering map. - `Builtins.td` - add an hlsl builtin for
 rsqrt. - `CGBuiltin.cpp` add the ir generation for the rsqrt intrinsic. -
 `SemaChecking.cpp` - reuse the one arg float only  checks. -
 `IntrinsicsDirectX.td -add an `rsqrt` intrinsic.

---
 clang/include/clang/Basic/Builtins.td |  6 +++
 clang/lib/CodeGen/CGBuiltin.cpp   |  8 +++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h  | 32 +++
 clang/lib/Sema/SemaChecking.cpp   |  1 +
 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl| 53 +++
 clang/test/SemaHLSL/BuiltIns/dot-warning.ll   | 49 +
 .../test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl  | 27 ++
 llvm/include/llvm/IR/IntrinsicsDirectX.td |  1 +
 llvm/lib/Target/DirectX/DXIL.td   |  3 ++
 llvm/test/CodeGen/DirectX/rsqrt.ll| 31 +++
 10 files changed, 211 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/dot-warning.ll
 create mode 100644 clang/test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/rsqrt.ll

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 9c703377ca8d3e..de0cfb4e46b8bd 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4590,6 +4590,12 @@ def HLSLRcp : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLRSqrt : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_rsqrt"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 20c35757939152..d2c83a5e405f42 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18077,6 +18077,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
 ArrayRef{Op0}, nullptr, "dx.rcp");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
+Value *Op0 = EmitScalarExpr(E->getArg(0));
+if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+  llvm_unreachable("rsqrt operand must have a float representation");
+return Builder.CreateIntrinsic(
+/*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
+ArrayRef{Op0}, nullptr, "dx.rsqrt");
+  }
   }
   return nullptr;
 }
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 45f8544392584e..71238a4f268ede 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1153,6 +1153,38 @@ double3 rcp(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
 double4 rcp(double4);
 
+//===--===//
+// rsqrt builtins
+//===--===//
+
+/// \fn T rsqrt(T x)
+/// \brief RReturns the reciprocal of the square root of the specified value \a
+/// x. \param x The specified input value.
+///
+/// This function uses the following formula: 1 / sqrt(x).
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half rsqrt(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half2 rsqrt(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half3 rsqrt(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half4 rsqrt(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float rsqrt(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float2 rsqrt(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float3 rsqrt(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float4 rsqrt(float4);
+
 
//===--===//
 // round builtins
 
//===--===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b630c3fa2..0dafff47ab4040 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5285,6 +5285,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *The

[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Farzon Lotfi via cfe-commits

https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/84820

>From a46ecdee6356e744a80f3c29748e7c3482a89760 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi 
Date: Mon, 11 Mar 2024 15:17:35 -0400
Subject: [PATCH 1/2] [HLSL] Implement `rsqrt` intrinsic This change implements
 #70074 - `hlsl_intrinsics.h - add the rsqrt api - `DXIL.td` add the llvm
 intrinsic to DXIL op lowering map. - `Builtins.td` - add an hlsl builtin for
 rsqrt. - `CGBuiltin.cpp` add the ir generation for the rsqrt intrinsic. -
 `SemaChecking.cpp` - reuse the one arg float only  checks. -
 `IntrinsicsDirectX.td -add an `rsqrt` intrinsic.

---
 clang/include/clang/Basic/Builtins.td |  6 +++
 clang/lib/CodeGen/CGBuiltin.cpp   |  8 +++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h  | 32 +++
 clang/lib/Sema/SemaChecking.cpp   |  1 +
 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl| 53 +++
 clang/test/SemaHLSL/BuiltIns/dot-warning.ll   | 49 +
 .../test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl  | 27 ++
 llvm/include/llvm/IR/IntrinsicsDirectX.td |  1 +
 llvm/lib/Target/DirectX/DXIL.td   |  3 ++
 llvm/test/CodeGen/DirectX/rsqrt.ll| 31 +++
 10 files changed, 211 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/dot-warning.ll
 create mode 100644 clang/test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/rsqrt.ll

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 9c703377ca8d3e..de0cfb4e46b8bd 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4590,6 +4590,12 @@ def HLSLRcp : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLRSqrt : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_rsqrt"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 20c35757939152..d2c83a5e405f42 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18077,6 +18077,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
 ArrayRef{Op0}, nullptr, "dx.rcp");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
+Value *Op0 = EmitScalarExpr(E->getArg(0));
+if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+  llvm_unreachable("rsqrt operand must have a float representation");
+return Builder.CreateIntrinsic(
+/*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
+ArrayRef{Op0}, nullptr, "dx.rsqrt");
+  }
   }
   return nullptr;
 }
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 45f8544392584e..71238a4f268ede 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1153,6 +1153,38 @@ double3 rcp(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
 double4 rcp(double4);
 
+//===--===//
+// rsqrt builtins
+//===--===//
+
+/// \fn T rsqrt(T x)
+/// \brief RReturns the reciprocal of the square root of the specified value \a
+/// x. \param x The specified input value.
+///
+/// This function uses the following formula: 1 / sqrt(x).
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half rsqrt(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half2 rsqrt(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half3 rsqrt(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half4 rsqrt(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float rsqrt(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float2 rsqrt(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float3 rsqrt(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float4 rsqrt(float4);
+
 
//===--===//
 // round builtins
 
//===--===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b630c3fa2..0dafff47ab4040 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5285,6 +5285,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *The

[clang] [clang][modules] Headers meant to be included multiple times can be completely invisible in clang module builds (PR #83660)

2024-03-11 Thread Ian Anderson via cfe-commits

ian-twilightcoder wrote:

> To clarify a little bit
> 
> > [...] The "already included" state is global across all modules (which is 
> > necessary so that non-modular headers don't get compiled into multiple 
> > translation units and cause redeclaration errors).
> 
> The necessity isn't actually true. The same definition in multiple modules 
> shouldn't confuse clang as long as these definitions are identical. But this 
> is a side issue.
Sometimes it does confuse clang, at least I saw problems with a `typedef enum` 
when I made an include-once header `textual`.

> To re-iterate what this change is about:
> 
> 1. `#import` implies a file is a single-include
> 2. Textual header in a module map implies a file is a multi-include (aka 
> re-entrant)
> 3. When we have both `#import` and the header marked as textual, `#import` 
> "wins", i.e. the file is single-include
> 4. You want to make that when we have both `#import` and a textual header, 
> textual should "win", i.e. the file should be multi-include
> 
> Is it correct?

That's correct. `#import` is an external source - often it comes from the users 
of the header and not the author, and the users might not be consistent with 
each other. `textual` comes from the author and a much stronger indicator of 
intent.

https://github.com/llvm/llvm-project/pull/83660
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] Headers meant to be included multiple times can be completely invisible in clang module builds (PR #83660)

2024-03-11 Thread Volodymyr Sapsai via cfe-commits

vsapsai wrote:

To clarify a little bit
> [...] The "already included" state is global across all modules (which is 
> necessary so that non-modular headers don't get compiled into multiple 
> translation units and cause redeclaration errors).

The necessity isn't actually true. The same definition in multiple modules 
shouldn't confuse clang as long as these definitions are identical. But this is 
a side issue.

To re-iterate what this change is about:
1. `#import` implies a file is a single-include
2. Textual header in a module map implies a file is a multi-include (aka 
re-entrant)
3. When we have both `#import` and the header marked as textual, `#import` 
"wins", i.e. the file is single-include
4. You want to make that when we have both `#import` and a textual header, 
textual should "win", i.e. the file should be multi-include

Is it correct?

https://github.com/llvm/llvm-project/pull/83660
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][modules] Headers meant to be included multiple times can be completely invisible in clang module builds (PR #83660)

2024-03-11 Thread Ian Anderson via cfe-commits

https://github.com/ian-twilightcoder updated 
https://github.com/llvm/llvm-project/pull/83660

>From 1cb3d459f3a9ae73ac98bf8c06b905d788be954f Mon Sep 17 00:00:00 2001
From: Ian Anderson 
Date: Fri, 1 Mar 2024 22:17:09 -0800
Subject: [PATCH] [clang][modules] Headers meant to be included multiple times
 can be completely invisible in clang module builds

Once a file has been `#import`'ed, it gets stamped as if it was `#pragma once` 
and will not be re-entered, even on #include. This means that any errant 
#import of a file designed to be included multiple times, such as , 
will incorrectly mark it as include-once and break the multiple include 
functionality. Normally this isn't a big problem, e.g.  can't have 
its NDEBUG mode changed after the first #import, but it is still mostly 
functional. However, when clang modules are involved, this can cause the header 
to be hidden entirely.

Objective-C code most often uses #import for everything, because it's required 
for most Objective-C headers to prevent double inclusion and redeclaration 
errors. (It's rare for Objective-C headers to use macro guards or `#pragma 
once`.) The problem arises when a submodule includes a multiple-include header. 
The "already included" state is global across all modules (which is necessary 
so that non-modular headers don't get compiled into multiple translation units 
and cause redeclaration errors). If another module or the main file #import's 
the same header, it becomes invisible from then on. If the original submodule 
is not imported, the include of the header will effectively do nothing and the 
header will be invisible. The only way to actually get the header's 
declarations is to somehow figure out which submodule consumed the header, and 
import that instead. That's basically impossible since it depends on exactly 
which modules were built in which order.

#import is a poor indicator of whether a header is actually include-once, as 
the #import is external to the header it applies to, and requires that all 
inclusions correctly and consistently use #import vs #include. When modules are 
enabled, consider a header marked `textual` in its module as a stronger 
indicator of multiple-include than #import's indication of include-once. This 
will allow headers like  to always be included when modules are 
enabled, even if #import is erroneously used somewhere.
---
 clang/include/clang/Lex/HeaderSearch.h   |  26 ++-
 clang/lib/Lex/HeaderSearch.cpp   | 183 +--
 clang/lib/Serialization/ASTReader.cpp|   2 +-
 clang/test/Modules/builtin-import.mm |   2 +
 clang/test/Modules/import-textual-noguard.mm |   6 +-
 clang/test/Modules/import-textual.mm |   2 +
 clang/test/Modules/multiple-import.m |  43 +
 7 files changed, 201 insertions(+), 63 deletions(-)
 create mode 100644 clang/test/Modules/multiple-import.m

diff --git a/clang/include/clang/Lex/HeaderSearch.h 
b/clang/include/clang/Lex/HeaderSearch.h
index 705dcfa8aacc3f..4c9fb58fbd35ef 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -78,11 +78,19 @@ struct HeaderFileInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned External : 1;
 
-  /// Whether this header is part of a module.
+  /// Whether this header is part of and built with a module
+  /// (`isTextualModuleHeader` will be `false`).
   LLVM_PREFERRED_TYPE(bool)
   unsigned isModuleHeader : 1;
 
-  /// Whether this header is part of the module that we are building.
+  /// Whether this header is a textual header in a module (`isModuleHeader` 
will
+  /// be `false`).
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned isTextualModuleHeader : 1;
+
+  /// Whether this header is part of the module that we are building
+  /// (independent of `isModuleHeader` and `isTextualModuleHeader`, they can
+  /// both be `false`).
   LLVM_PREFERRED_TYPE(bool)
   unsigned isCompilingModuleHeader : 1;
 
@@ -128,13 +136,20 @@ struct HeaderFileInfo {
 
   HeaderFileInfo()
   : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User),
-External(false), isModuleHeader(false), isCompilingModuleHeader(false),
-Resolved(false), IndexHeaderMapHeader(false), IsValid(false)  {}
+External(false), isModuleHeader(false), isTextualModuleHeader(false),
+isCompilingModuleHeader(false), Resolved(false),
+IndexHeaderMapHeader(false), IsValid(false) {}
 
   /// Retrieve the controlling macro for this header file, if
   /// any.
   const IdentifierInfo *
   getControllingMacro(ExternalPreprocessorSource *External);
+
+  /// Update the module membership bits based on the header role.
+  ///
+  /// isModuleHeader will potentially be set, but not cleared.
+  /// isTextualModuleHeader will be set or cleared based on the role update.
+  void mergeModuleMembership(ModuleMap::ModuleHeaderRole Role);
 };
 
 /// An external source of header file information, which may supply
@@ -522,6 +537,9 @@ class HeaderSearch {
  

[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Farzon Lotfi via cfe-commits


@@ -0,0 +1,27 @@
+
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm 
-disable-llvm-passes -verify -verify-ignore-unexpected
+
+float test_too_few_arg() {
+  return __builtin_hlsl_elementwise_rsqrt();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_rsqrt(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 
2}}
+}
+
+float builtin_bool_to_float_type_promotion(bool p1) {
+  return __builtin_hlsl_elementwise_rsqrt(p1);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating 
point type (was 'bool')}}

farzonl wrote:

this is the good point I can change the order of 
`CheckAllArgsHaveFloatRepresentation` and 
`PrepareBuiltinElementwiseMathOneArgCall`

https://github.com/llvm/llvm-project/pull/84820
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Farzon Lotfi via cfe-commits


@@ -0,0 +1,49 @@
+; ModuleID = 
'D:\projects\llvm-project\clang\test\SemaHLSL\BuiltIns\dot-warning.hlsl'

farzonl wrote:

this file shouldn't be in this pr. this was a mistake.

https://github.com/llvm/llvm-project/pull/84820
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-11 Thread Kees Cook via cfe-commits

kees wrote:

> There are currently over 200 separate unions using the work-around.

Specifically, this is what Linux uses for getting C99 flexible arrays in unions 
and alone in structs:

```
#define DECLARE_FLEX_ARRAY(TYPE, NAME)\
struct { \
struct { } __empty_ ## NAME; \
TYPE NAME[]; \
}
```

The conversion from the "struct hack" to C99 flexible arrays is complete, 
except for this wart.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Damyan Pepper via cfe-commits


@@ -0,0 +1,27 @@
+
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm 
-disable-llvm-passes -verify -verify-ignore-unexpected
+
+float test_too_few_arg() {
+  return __builtin_hlsl_elementwise_rsqrt();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 
0}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_rsqrt(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 
2}}
+}
+
+float builtin_bool_to_float_type_promotion(bool p1) {
+  return __builtin_hlsl_elementwise_rsqrt(p1);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating 
point type (was 'bool')}}

damyanp wrote:

"integer" - is this true?  I thought it only worked with things that were 
float-like.

https://github.com/llvm/llvm-project/pull/84820
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Damyan Pepper via cfe-commits


@@ -0,0 +1,49 @@
+; ModuleID = 
'D:\projects\llvm-project\clang\test\SemaHLSL\BuiltIns\dot-warning.hlsl'

damyanp wrote:

Maybe naive question...but isn't this about the `dot` intrinsic?

https://github.com/llvm/llvm-project/pull/84820
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Damyan Pepper via cfe-commits


@@ -1153,6 +1153,38 @@ double3 rcp(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
 double4 rcp(double4);
 
+//===--===//
+// rsqrt builtins
+//===--===//
+
+/// \fn T rsqrt(T x)
+/// \brief RReturns the reciprocal of the square root of the specified value \a

damyanp wrote:

`Rreturns` - typo?

https://github.com/llvm/llvm-project/pull/84820
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Damyan Pepper via cfe-commits


@@ -1153,6 +1153,38 @@ double3 rcp(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
 double4 rcp(double4);
 
+//===--===//
+// rsqrt builtins
+//===--===//
+
+/// \fn T rsqrt(T x)
+/// \brief RReturns the reciprocal of the square root of the specified value \a

damyanp wrote:

Comparing this with the doc for rcp above, wonder if it should be more like:

`Returns the reciprocal of the square root of the specified value.  ie 1 / 
sqrt(\a x).`



https://github.com/llvm/llvm-project/pull/84820
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Comments] Add argument parsing for @throw @throws @exception (PR #84726)

2024-03-11 Thread via cfe-commits


@@ -149,6 +191,76 @@ class TextTokenRetokenizer {
 addToken();
   }
 
+  /// Extract a type argument
+  bool lexDataType(Token &Tok) {
+if (isEnd())
+  return false;
+Position SavedPos = Pos;
+consumeWhitespace();
+SmallString<32> NextToken;
+SmallString<32> WordText;
+const char *WordBegin = Pos.BufferPtr;
+SourceLocation Loc = getSourceLocation();
+StringRef ConstVal = StringRef("const");

hdoc wrote:

Yes, it should. We'll update the PR to support that. Thank you for noticing and 
mentioning this.

Likewise in the future we'll probably want to add support for `restrict`. It's 
not necessary for this PR since `throw/throws/exception` is only useful in C++ 
but when parsing Doxygen comments in C codebases we'll want to have the option 
of supporting that.

https://github.com/llvm/llvm-project/pull/84726
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-11 Thread Kees Cook via cfe-commits

kees wrote:

> C99 added flexible array members, and the C99 rationale says the feature was 
> added specifically as a replacement for the common idiom known as the "struct 
> hack" for creating a structure containing a variable-size array.

This is my reasoning as well -- we (Linux dev hat on) have been converting a 
giant codebase from the "struct hack" to C99 flexible arrays, but the Standard 
(IMO) made a mistake in not recognizing the need for flexible arrays to be in 
unions. This was (and is) a well-used code pattern for the "struct hack" that 
has persisted for decades. Now, I'm not here to argue for a change to the 
Standard (that's a much different effort), but rather to argue that the 
existing "struck hack" _extensions_ be made to recognize the C99 flexible array 
as well, since otherwise it is not possible to cleanly update existing code.

The fact that Clang _already_ supports C99 flex arrays in unions (through other 
GNU extension) should serve as evidence of its sensible direct extension to C99 
flexible arrays. The "not in unions" check can already be evaded by using the 
"empty struct" GNU extension. So, to be clear: I'm talking about making the 
_existing_ extensions be uniformly applied. I don't want to be distracted by 
whether or not this should be part of the C Standard -- I would like Clang to 
do what it already does but without requiring the pointless syntactic 
obfuscation currently in use by Linux.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Comments] Add argument parsing for @throw @throws @exception (PR #84726)

2024-03-11 Thread via cfe-commits

https://github.com/hdoc updated https://github.com/llvm/llvm-project/pull/84726

>From ec3f444913d9162de4494cdb09b336b1b00380fa Mon Sep 17 00:00:00 2001
From: hdoc 
Date: Mon, 11 Mar 2024 01:13:25 -0700
Subject: [PATCH 1/2] Comment parsing: add argument parsing for @throw @throws
 @exception

Doxygen allows for the @throw, @throws, and @exception commands to
have an attached argument indicating the type being thrown. Currently,
Clang's AST parsing doesn't support parsing out this argument from doc
comments. The result is missing compatibility with Doxygen.

We would find it helpful if the AST exposed these thrown types as
BlockCommandComment arguments so that we could generate better
documentation.

This PR implements parsing of arguments for the @throw, @throws, and
@exception commands. Each command can only have one argument, matching
the semantics of Doxygen. We have also added unit tests to validate
the functionality.
---
 clang/include/clang/AST/CommentCommands.td |   6 +-
 clang/include/clang/AST/CommentParser.h|   3 +
 clang/lib/AST/CommentParser.cpp| 133 
 clang/unittests/AST/CommentParser.cpp  | 235 -
 4 files changed, 373 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/AST/CommentCommands.td 
b/clang/include/clang/AST/CommentCommands.td
index e839031752cdd8..06b2fa9b5531c6 100644
--- a/clang/include/clang/AST/CommentCommands.td
+++ b/clang/include/clang/AST/CommentCommands.td
@@ -132,9 +132,9 @@ def Tparam : BlockCommand<"tparam"> { let IsTParamCommand = 
1; }
 // HeaderDoc command for template parameter documentation.
 def Templatefield : BlockCommand<"templatefield"> { let IsTParamCommand = 1; }
 
-def Throws: BlockCommand<"throws"> { let IsThrowsCommand = 1; }
-def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; }
-def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; }
+def Throws: BlockCommand<"throws"> { let IsThrowsCommand = 1; let NumArgs 
= 1; }
+def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; let NumArgs = 
1; }
+def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; let 
NumArgs = 1;}
 
 def Deprecated : BlockCommand<"deprecated"> {
   let IsEmptyParagraphAllowed = 1;
diff --git a/clang/include/clang/AST/CommentParser.h 
b/clang/include/clang/AST/CommentParser.h
index e11e818b1af0a1..5884a25d007851 100644
--- a/clang/include/clang/AST/CommentParser.h
+++ b/clang/include/clang/AST/CommentParser.h
@@ -100,6 +100,9 @@ class Parser {
   ArrayRef
   parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
 
+  ArrayRef
+  parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+
   BlockCommandComment *parseBlockCommand();
   InlineCommandComment *parseInlineCommand();
 
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 8adfd85d0160c3..c70fa1b05cb241 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -75,6 +75,25 @@ class TextTokenRetokenizer {
 return *Pos.BufferPtr;
   }
 
+  char peekNext(unsigned offset) const {
+assert(!isEnd());
+assert(Pos.BufferPtr != Pos.BufferEnd);
+if (Pos.BufferPtr + offset <= Pos.BufferEnd) {
+  return *(Pos.BufferPtr + offset);
+} else {
+  return '\0';
+}
+  }
+
+  void peekNextToken(SmallString<32> &WordText) const {
+unsigned offset = 1;
+char C = peekNext(offset++);
+while (!isWhitespace(C) && C != '\0') {
+  WordText.push_back(C);
+  C = peekNext(offset++);
+}
+  }
+
   void consumeChar() {
 assert(!isEnd());
 assert(Pos.BufferPtr != Pos.BufferEnd);
@@ -89,6 +108,29 @@ class TextTokenRetokenizer {
 }
   }
 
+  /// Extract a template type
+  bool lexTemplateType(SmallString<32> &WordText) {
+unsigned IncrementCounter = 0;
+while (!isEnd()) {
+  const char C = peek();
+  WordText.push_back(C);
+  consumeChar();
+  switch (C) {
+  default:
+break;
+  case '<': {
+IncrementCounter++;
+  } break;
+  case '>': {
+IncrementCounter--;
+if (!IncrementCounter)
+  return true;
+  } break;
+  }
+}
+return false;
+  }
+
   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
@@ -149,6 +191,76 @@ class TextTokenRetokenizer {
 addToken();
   }
 
+  /// Extract a type argument
+  bool lexDataType(Token &Tok) {
+if (isEnd())
+  return false;
+Position SavedPos = Pos;
+consumeWhitespace();
+SmallString<32> NextToken;
+SmallString<32> WordText;
+const char *WordBegin = Pos.BufferPtr;
+SourceLocation Loc = getSourceLocation();
+StringRef ConstVal = StringRef("const");
+bool ConstPointer = false;
+
+while (!isEnd()) {
+  const char C = peek();
+  if (!isWhitespace(C)) {
+if (C == '<') {
+  if (!lexTemplateType(WordText))
+return fals

[clang] 6397f22 - [clang] Fix test after #84214

2024-03-11 Thread Vitaly Buka via cfe-commits

Author: Vitaly Buka
Date: 2024-03-11T15:33:32-07:00
New Revision: 6397f223c456ce5a0cc246cd81673794a4860fd1

URL: 
https://github.com/llvm/llvm-project/commit/6397f223c456ce5a0cc246cd81673794a4860fd1
DIFF: 
https://github.com/llvm/llvm-project/commit/6397f223c456ce5a0cc246cd81673794a4860fd1.diff

LOG: [clang] Fix test after #84214

Added: 


Modified: 
clang/test/CodeGen/remote-traps.c

Removed: 




diff  --git a/clang/test/CodeGen/remote-traps.c 
b/clang/test/CodeGen/remote-traps.c
index f053d1bd157f80..6751afb96d25f2 100644
--- a/clang/test/CodeGen/remote-traps.c
+++ b/clang/test/CodeGen/remote-traps.c
@@ -1,15 +1,15 @@
 // RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow 
-fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s 
 // RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow 
-fsanitize-trap=signed-integer-overflow -mllvm -clang-remove-traps -mllvm 
-remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call 
void @llvm.ubsantrap" --check-prefixes=REMOVE
 
-int f(int x) {
+int test(int x) {
   return x + 123;
 }
 
-// CHECK-LABEL: define dso_local noundef i32 @f(
+// CHECK-LABEL: define {{.*}}i32 @test(
 // CHECK: call { i32, i1 } @llvm.sadd.with.overflow.i32(
 // CHECK: trap:
 // CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
 // CHECK-NEXT: unreachable
 
-// REMOVE-LABEL: define dso_local noundef i32 @f(
+// REMOVE-LABEL: define {{.*}}i32 @test(
 // REMOVE: call { i32, i1 } @llvm.sadd.with.overflow.i32(



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Add optional pass to remove UBSAN traps using PGO (PR #84214)

2024-03-11 Thread Vitaly Buka via cfe-commits

vitalybuka wrote:

> @vitalybuka the test you added `remote-traps.c` seems to be failing on a few 
> buildbots, can you take a look?
> 
> * https://lab.llvm.org/buildbot/#/builders/231/builds/21492
> * https://lab.llvm.org/buildbot/#/builders/280/builds/6

looking

https://github.com/llvm/llvm-project/pull/84214
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][arm64e] Add methods and data members to Address, which are needed to authenticate signed pointers (PR #67454)

2024-03-11 Thread Anton Korobeynikov via cfe-commits

asl wrote:

> I'm not sure `extractRawPointerFromAddress` conveys the fact that the 
> function might do code-gen instead of just returning some pointer. I wonder 
> if there's a better name.
> 
> `computeRawPointerFromAddress` `genRawPointerFromAddress` 
> `generateRawPointerFromAddress` `codeGenRawPointerFromAddress`
> 
> Thoughts?

I do not have particular preference. But probably like `compute` / `gen` 
slightly more.

https://github.com/llvm/llvm-project/pull/67454
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][CodeGen] Improved codegen for combined loop directives (PR #72417)

2024-03-11 Thread David Pagan via cfe-commits

ddpagan wrote:

After some additional discussion with Alexey offline, he concluded that the 
current changes are okay, specifically for this reason:

_"Then I realized that actually it does not require AST nodes building. In this 
case, this helper class should be moved to CodeGenStmt and hidden in the 
anonymous namespace. But you also need to use it in CodeGenModule. In this case 
better to use a flag in statement, as you have it right now. I.e. having this 
analysis in Sema looks good_"


https://github.com/llvm/llvm-project/pull/72417
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Add -HV option translation to clang-dxc.exe (PR #83938)

2024-03-11 Thread Chris B via cfe-commits


@@ -69,6 +69,16 @@ LangStandard::Kind LangStandard::getLangKind(StringRef Name) 
{
   .Default(lang_unspecified);
 }
 
+LangStandard::Kind LangStandard::getHLSLLangKind(StringRef Name) {
+  return llvm::StringSwitch(Name)
+  .Case("2016", LangStandard::lang_hlsl2016)
+  .Case("2017", LangStandard::lang_hlsl2017)
+  .Case("2018", LangStandard::lang_hlsl2018)
+  .Case("2021", LangStandard::lang_hlsl2021)
+  .Case("202x", LangStandard::lang_hlsl202x)
+  .Default(LangStandard::lang_unspecified);
+}

llvm-beanz wrote:

This function isn't needed if you use the Options Enum marshalling that I 
suggested.

https://github.com/llvm/llvm-project/pull/83938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-tools-extra] [flang] [lld] [llvm] [flang][clang] Add Visibility specific help text for options (PR #81869)

2024-03-11 Thread Andrzej Warzyński via cfe-commits

https://github.com/banach-space edited 
https://github.com/llvm/llvm-project/pull/81869
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [clang][UBSan] Add implicit conversion check for bitfields (PR #75481)

2024-03-11 Thread Eli Friedman via cfe-commits


@@ -5571,11 +5571,52 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const 
BinaryOperator *E) {
   break;
 }
 
-RValue RV = EmitAnyExpr(E->getRHS());
+llvm::Value *Previous = nullptr;

efriedma-quic wrote:

Can we de-duplicate this code with the corresponding code in CGExprScalar, 
similar to the way EmitCompoundAssignmentLValue works?

https://github.com/llvm/llvm-project/pull/75481
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [clang][UBSan] Add implicit conversion check for bitfields (PR #75481)

2024-03-11 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic commented:

I'm a bit concerned about the lack of C++ testcases, since you're making 
changes to some C++-only codepaths.

https://github.com/llvm/llvm-project/pull/75481
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [clang][UBSan] Add implicit conversion check for bitfields (PR #75481)

2024-03-11 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic edited 
https://github.com/llvm/llvm-project/pull/75481
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][arm64e] Add methods and data members to Address, which are needed to authenticate signed pointers (PR #67454)

2024-03-11 Thread Akira Hatanaka via cfe-commits

ahatanak wrote:

I'm not sure `extractRawPointerFromAddress` conveys the fact that the function 
might do code-gen instead of just returning some pointer. I wonder if there's a 
better name.

`computeRawPointerFromAddress`
`genRawPointerFromAddress`
`generateRawPointerFromAddress`
`codeGenRawPointerFromAddress`

Thoughts?

https://github.com/llvm/llvm-project/pull/67454
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [asan][windows] Eliminate the static asan runtime on windows (PR #81677)

2024-03-11 Thread Charlie Barto via cfe-commits

barcharcraz wrote:

> > @vitalybuka Here's a more detailed explination on the motivations behind 
> > this change from @amyw-msft, who is the original author of these changes on 
> > our side. (And who I'll add as a co-author using fixup commits)
> > https://devblogs.microsoft.com/cppblog/msvc-address-sanitizer-one-dll-for-all-runtime-configurations/
> 
> Thanks. Would be possible to extract summary paragraph and put on top of the 
> main patch, with this URL.
> 
> Also, to submit, it would be nice to do with smaller patches with some time 
> between them to let bots test them separately. Or it's going to be annoying 
> to revert/reland such large patch.
> 
> Almost every item from description looks like a candidate for a patch.
> 
> Relaxing `// CHECK: #{{[1-3]}}` could be done in a separate patch.

OK I'll extract at least the first few commits into their own PR, along with 
the test relaxations. The items in the description are harder to extract 
without breaking all the tests.

https://github.com/llvm/llvm-project/pull/81677
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Bill Wendling via cfe-commits


@@ -1052,11 +1053,143 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const 
Expr *E, unsigned Type,
   return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, 
IsSigned));
 }
 
+namespace {
+
+/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
+/// __builtin_dynamic_object_size call. Information gathered from the
+/// sub-object is used by the back-end to determine the correct size when the
+/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
+/// for the sub-object size).
+///
+/// The expectation is that we'll eventually hit one of three expression types:
+///
+///   1. DeclRefExpr - This is the expression for the base of the structure.
+///   2. MemberExpr - This is the field in the structure.
+///   3. CompoundLiteralExpr - This is for people who create something
+///  heretical like (struct foo has a flexible array member):
+///
+///(struct foo){ 1, 2 }.blah[idx];
+///
+/// All other expressions can be correctly handled with the current code.
+struct SubobjectFinder
+: public ConstStmtVisitor {
+  SubobjectFinder() = default;
+
+  
//======//
+  //Visitor Methods
+  
//======//
+
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+  const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+return E;
+  }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {

bwendling wrote:

It seems to. See https://godbolt.org/z/4xaY4191o for an example (the `&((char 
*)&var.z.a)[argc]` example looks through them.

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Bill Wendling via cfe-commits


@@ -26996,18 +26996,38 @@ class, structure, array, or other object.
 Arguments:
 ""
 
-The ``llvm.objectsize`` intrinsic takes four arguments. The first argument is a
-pointer to or into the ``object``. The second argument determines whether
-``llvm.objectsize`` returns 0 (if true) or -1 (if false) when the object size 
is
-unknown. The third argument controls how ``llvm.objectsize`` acts when ``null``
-in address space 0 is used as its pointer argument. If it's ``false``,
-``llvm.objectsize`` reports 0 bytes available when given ``null``. Otherwise, 
if
-the ``null`` is in a non-zero address space or if ``true`` is given for the
-third argument of ``llvm.objectsize``, we assume its size is unknown. The 
fourth
-argument to ``llvm.objectsize`` determines if the value should be evaluated at
-runtime.
+The ``llvm.objectsize`` intrinsic takes six arguments:
+
+- The first argument is a pointer to or into the ``object``.
+- The second argument controls which value to return when the size is unknown:
+
+  - If it's ``false``, ``llvm.objectsize`` returns ``-1``.
+  - If it's ``true``, ``llvm.objectsize`` returns ``0``.
+
+- The third argument controls how ``llvm.objectsize`` acts when ``null`` in
+  address space 0 is used as its pointer argument:
+
+  - If it's ``false``, ``llvm.objectsize`` reports 0 bytes available when given
+``null``.
+  - If it's ``true``, or the ``null`` pointer is in a non-zero address space,
+the size is assumed to be unknown.
+
+- The fourth argument to ``llvm.objectsize`` determines if the value should be
+  evaluated at runtime.
+- The fifth argument controls which size ``llvm.objectsize`` returns:
+
+  - If it's ``false``, ``llvm.objectsize`` returns the size of the closest
+surrounding subobject.
+  - If it's ``true``, ``llvm.objectsize`` returns the size of the whole object.
+
+- If non-zero, the sixth and seventh arguments encode the size and offset
+  information, respectively, of the original subobject's layout and is used
+  when the fifth argument is ``false``.
+- The seventh argument encodes the offset information of the original
+  subobject's layout and is used when the fifth argument is ``false``.

bwendling wrote:

A lot of your review seems to be based on this differing of opinion of what to 
do when indexing outside of the object currently being pointed to. Let's get 
this sorted out before I make changes...

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Fix incorrect line numbers with -E and raw string (#47577) (PR #77021)

2024-03-11 Thread Steve Cornett via cfe-commits

stevecor wrote:

Ping

https://github.com/llvm/llvm-project/pull/77021
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Bill Wendling via cfe-commits


@@ -26996,18 +26996,38 @@ class, structure, array, or other object.
 Arguments:
 ""
 
-The ``llvm.objectsize`` intrinsic takes four arguments. The first argument is a
-pointer to or into the ``object``. The second argument determines whether
-``llvm.objectsize`` returns 0 (if true) or -1 (if false) when the object size 
is
-unknown. The third argument controls how ``llvm.objectsize`` acts when ``null``
-in address space 0 is used as its pointer argument. If it's ``false``,
-``llvm.objectsize`` reports 0 bytes available when given ``null``. Otherwise, 
if
-the ``null`` is in a non-zero address space or if ``true`` is given for the
-third argument of ``llvm.objectsize``, we assume its size is unknown. The 
fourth
-argument to ``llvm.objectsize`` determines if the value should be evaluated at
-runtime.
+The ``llvm.objectsize`` intrinsic takes six arguments:
+
+- The first argument is a pointer to or into the ``object``.
+- The second argument controls which value to return when the size is unknown:
+
+  - If it's ``false``, ``llvm.objectsize`` returns ``-1``.
+  - If it's ``true``, ``llvm.objectsize`` returns ``0``.
+
+- The third argument controls how ``llvm.objectsize`` acts when ``null`` in
+  address space 0 is used as its pointer argument:
+
+  - If it's ``false``, ``llvm.objectsize`` reports 0 bytes available when given
+``null``.
+  - If it's ``true``, or the ``null`` pointer is in a non-zero address space,
+the size is assumed to be unknown.
+
+- The fourth argument to ``llvm.objectsize`` determines if the value should be
+  evaluated at runtime.
+- The fifth argument controls which size ``llvm.objectsize`` returns:
+
+  - If it's ``false``, ``llvm.objectsize`` returns the size of the closest
+surrounding subobject.
+  - If it's ``true``, ``llvm.objectsize`` returns the size of the whole object.
+
+- If non-zero, the sixth and seventh arguments encode the size and offset
+  information, respectively, of the original subobject's layout and is used
+  when the fifth argument is ``false``.
+- The seventh argument encodes the offset information of the original
+  subobject's layout and is used when the fifth argument is ``false``.

bwendling wrote:

> I think the information you're passing in here isn't quite what we'd want. If 
> I'm reading the code correctly, the offset you're passing in is the field 
> offset relative to the immediately-enclosing record type, which doesn't give 
> us any information about either where the pointer is within the subobject, or 
> where the subobject is within the complete object, so this doesn't seem like 
> it can be enough information to produce a correct result.

That's the information which leads to the correct calculation. If you have a 
pointer like this:

```c
struct S {
  int a;
  char c[234];
  int b;
};

void foo(struct S *ptr) {
  size_t x = __builtin_dynamic_object_size(ptr->a[22], 1);
/* ... */
}
```

the value of `x` should be `0`. See https://godbolt.org/z/4xaY4191o for a list 
of examples that show this behavior (at least in GCC). Notice that this applies 
for the sub-object type only. If the __bdos value is `0`, then your behavior is 
the correct behavior.
   

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][deps] Fix dependency scanning with -working-directory (PR #84525)

2024-03-11 Thread Jan Svoboda via cfe-commits

https://github.com/jansvoboda11 approved this pull request.

LGTM, thanks!

https://github.com/llvm/llvm-project/pull/84525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Richard Smith via cfe-commits


@@ -1052,11 +1053,143 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const 
Expr *E, unsigned Type,
   return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, 
IsSigned));
 }
 
+namespace {
+
+/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
+/// __builtin_dynamic_object_size call. Information gathered from the
+/// sub-object is used by the back-end to determine the correct size when the
+/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
+/// for the sub-object size).
+///
+/// The expectation is that we'll eventually hit one of three expression types:
+///
+///   1. DeclRefExpr - This is the expression for the base of the structure.
+///   2. MemberExpr - This is the field in the structure.
+///   3. CompoundLiteralExpr - This is for people who create something
+///  heretical like (struct foo has a flexible array member):
+///
+///(struct foo){ 1, 2 }.blah[idx];
+///
+/// All other expressions can be correctly handled with the current code.
+struct SubobjectFinder
+: public ConstStmtVisitor {
+  SubobjectFinder() = default;
+
+  
//======//
+  //Visitor Methods
+  
//======//
+
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+  const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+return E;
+  }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {

zygoloid wrote:

Does GCC look through explicit casts? I wonder if this should be restricted to 
`ImplicitCastExpr`s.

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Richard Smith via cfe-commits


@@ -1052,11 +1053,143 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const 
Expr *E, unsigned Type,
   return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, 
IsSigned));
 }
 
+namespace {
+
+/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
+/// __builtin_dynamic_object_size call. Information gathered from the
+/// sub-object is used by the back-end to determine the correct size when the
+/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
+/// for the sub-object size).
+///
+/// The expectation is that we'll eventually hit one of three expression types:
+///
+///   1. DeclRefExpr - This is the expression for the base of the structure.
+///   2. MemberExpr - This is the field in the structure.
+///   3. CompoundLiteralExpr - This is for people who create something
+///  heretical like (struct foo has a flexible array member):
+///
+///(struct foo){ 1, 2 }.blah[idx];
+///
+/// All other expressions can be correctly handled with the current code.
+struct SubobjectFinder
+: public ConstStmtVisitor {
+  SubobjectFinder() = default;
+
+  
//======//
+  //Visitor Methods
+  
//======//
+
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+  const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+return E;
+  }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitParenExpr(const ParenExpr *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+return Visit(E->getSubExpr());
+  }

zygoloid wrote:

I think you'll need to be more careful when walking through address-of / 
dereferences -- the set of things you should step over when traversing a 
pointer to an object is different from the set of things you should step over 
when traversing an object lvalue. For example, the bounds to use for 
`*p->member` will be computed as the bounds of `member`, which isn't correct. I 
think you could address this by either having separate traversals for pointers 
versus lvalues, or by avoiding (for example) stepping through lvalue-to-rvalue 
conversions when stepping over `CastExpr`s -- and in fact, the latter seems 
like a good idea in general, given that a `CastExpr` could do pretty much 
anything to the pointer / lvalue. In general, I think it's only really safe to 
step over casts that are a no-op for address purposes. Bitcasts seem OK, 
address space conversions seem OK, etc. but a lot of cast kinds are not going 
to be reasonable to step over.

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Richard Smith via cfe-commits


@@ -1052,11 +1053,143 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const 
Expr *E, unsigned Type,
   return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, 
IsSigned));
 }
 
+namespace {
+
+/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
+/// __builtin_dynamic_object_size call. Information gathered from the
+/// sub-object is used by the back-end to determine the correct size when the
+/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
+/// for the sub-object size).
+///
+/// The expectation is that we'll eventually hit one of three expression types:
+///
+///   1. DeclRefExpr - This is the expression for the base of the structure.
+///   2. MemberExpr - This is the field in the structure.
+///   3. CompoundLiteralExpr - This is for people who create something
+///  heretical like (struct foo has a flexible array member):
+///
+///(struct foo){ 1, 2 }.blah[idx];
+///
+/// All other expressions can be correctly handled with the current code.
+struct SubobjectFinder
+: public ConstStmtVisitor {
+  SubobjectFinder() = default;
+
+  
//======//
+  //Visitor Methods
+  
//======//
+
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+  const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+return E;
+  }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitParenExpr(const ParenExpr *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+return Visit(E->getSubExpr());
+  }
+};
+
+} // end anonymous namespace
+
+/// getFieldInfo - Gather the size and offset of the field \p VD in \p RD.
+static std::pair getFieldInfo(CodeGenFunction &CGF,
+  const RecordDecl *RD,
+  const ValueDecl *VD,
+  uint64_t Offset = 0) {
+  if (!RD)
+return std::make_pair(0, 0);
+
+  ASTContext &Ctx = CGF.getContext();
+  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+  unsigned FieldNo = 0;
+
+  for (const Decl *D : RD->decls()) {
+if (const auto *Record = dyn_cast(D)) {
+  std::pair Res =
+  getFieldInfo(CGF, Record->getDefinition(), VD,
+   Offset + Layout.getFieldOffset(FieldNo));
+  if (Res.first != 0)
+return Res;
+  continue;
+}
+
+if (const auto *FD = dyn_cast(D); FD == VD) {
+  Offset += Layout.getFieldOffset(FieldNo);
+  return 
std::make_pair(Ctx.getTypeSizeInChars(FD->getType()).getQuantity(),
+Ctx.toCharUnitsFromBits(Offset).getQuantity());
+}
+
+if (isa(D))
+  ++FieldNo;
+  }

zygoloid wrote:

This work recursively looping through fields is not necessary: this function 
only succeeds when `VD` is a `FieldDecl`, so you can `dyn_cast` it to that 
type, then get the enclosing `DeclContext` to find the record, and use 
`FieldDecl::getFieldIndex` to find the field number.

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Richard Smith via cfe-commits


@@ -1052,11 +1053,143 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const 
Expr *E, unsigned Type,
   return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, 
IsSigned));
 }
 
+namespace {
+
+/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
+/// __builtin_dynamic_object_size call. Information gathered from the
+/// sub-object is used by the back-end to determine the correct size when the
+/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
+/// for the sub-object size).
+///
+/// The expectation is that we'll eventually hit one of three expression types:
+///
+///   1. DeclRefExpr - This is the expression for the base of the structure.
+///   2. MemberExpr - This is the field in the structure.
+///   3. CompoundLiteralExpr - This is for people who create something
+///  heretical like (struct foo has a flexible array member):
+///
+///(struct foo){ 1, 2 }.blah[idx];
+///
+/// All other expressions can be correctly handled with the current code.
+struct SubobjectFinder
+: public ConstStmtVisitor {
+  SubobjectFinder() = default;
+
+  
//======//
+  //Visitor Methods
+  
//======//
+
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+  const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+return E;
+  }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitParenExpr(const ParenExpr *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+return Visit(E->getSubExpr());
+  }
+  const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+return Visit(E->getSubExpr());
+  }
+};
+
+} // end anonymous namespace
+
+/// getFieldInfo - Gather the size and offset of the field \p VD in \p RD.
+static std::pair getFieldInfo(CodeGenFunction &CGF,
+  const RecordDecl *RD,
+  const ValueDecl *VD,
+  uint64_t Offset = 0) {
+  if (!RD)
+return std::make_pair(0, 0);
+
+  ASTContext &Ctx = CGF.getContext();
+  const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
+  unsigned FieldNo = 0;
+
+  for (const Decl *D : RD->decls()) {
+if (const auto *Record = dyn_cast(D)) {
+  std::pair Res =
+  getFieldInfo(CGF, Record->getDefinition(), VD,
+   Offset + Layout.getFieldOffset(FieldNo));
+  if (Res.first != 0)
+return Res;
+  continue;
+}
+
+if (const auto *FD = dyn_cast(D); FD == VD) {
+  Offset += Layout.getFieldOffset(FieldNo);
+  return 
std::make_pair(Ctx.getTypeSizeInChars(FD->getType()).getQuantity(),
+Ctx.toCharUnitsFromBits(Offset).getQuantity());
+}
+
+if (isa(D))
+  ++FieldNo;
+  }
+
+  return std::make_pair(0, 0);
+}
+
+/// getSubobjectInfo - Find the sub-object that \p E points to. If it lives
+/// inside a struct, return the "size" and "offset" of that sub-object.
+static std::pair getSubobjectInfo(CodeGenFunction &CGF,
+  const Expr *E) {
+  const Expr *Subobject = SubobjectFinder().Visit(E);
+  if (!Subobject)
+return std::make_pair(0, 0);
+
+  const RecordDecl *OuterRD = nullptr;
+  const ValueDecl *VD = nullptr;
+
+  if (const auto *DRE = dyn_cast(Subobject)) {
+// We're pointing to the beginning of the struct.
+VD = DRE->getDecl();
+QualType Ty = VD->getType();
+if (Ty->isPointerType())
+  Ty = Ty->getPointeeType();
+OuterRD = Ty->getAsRecordDecl();

zygoloid wrote:

If I'm reading this correctly, I think this case is redundant: `getFieldInfo` 
only succeeds when `VD` is a field, but we're not going to have an evaluated 
`DeclRefExpr` that names a field. Can we return `0, 0` in this case, like we do 
for compound literals? I think the only case when we have non-zero values to 
return is when we've found a `FieldDecl`.

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Richard Smith via cfe-commits


@@ -1052,11 +1053,143 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const 
Expr *E, unsigned Type,
   return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, 
IsSigned));
 }
 
+namespace {
+
+/// SubobjectFinder - A simple visitor to find the "sub-object" pointed to by a
+/// __builtin_dynamic_object_size call. Information gathered from the
+/// sub-object is used by the back-end to determine the correct size when the
+/// 'TYPE' of the __bdos call has the least significant bit set (i.e. asking
+/// for the sub-object size).
+///
+/// The expectation is that we'll eventually hit one of three expression types:
+///
+///   1. DeclRefExpr - This is the expression for the base of the structure.
+///   2. MemberExpr - This is the field in the structure.
+///   3. CompoundLiteralExpr - This is for people who create something
+///  heretical like (struct foo has a flexible array member):
+///
+///(struct foo){ 1, 2 }.blah[idx];
+///
+/// All other expressions can be correctly handled with the current code.
+struct SubobjectFinder
+: public ConstStmtVisitor {
+  SubobjectFinder() = default;
+
+  
//======//
+  //Visitor Methods
+  
//======//
+
+  const Expr *VisitStmt(const Stmt *S) { return nullptr; }
+
+  const Expr *VisitDeclRefExpr(const DeclRefExpr *E) { return E; }
+  const Expr *VisitMemberExpr(const MemberExpr *E) { return E; }
+  const Expr *VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
+return E;
+  }
+
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+return Visit(E->getBase());
+  }
+  const Expr *VisitCastExpr(const CastExpr *E) {
+return Visit(E->getSubExpr());
+  }

zygoloid wrote:

A derived-to-base cast is a traversal to a subobject in C++, so we should 
presumably terminate the traversal when we reach one and use the offset and 
size of the base class as the subobject.

That'd be a pretty big delta from what you have here, but it'd be a good idea 
to add a FIXME here.

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang] Correct __builtin_dynamic_object_size for subobject types (PR #83204)

2024-03-11 Thread Richard Smith via cfe-commits


@@ -26996,18 +26996,38 @@ class, structure, array, or other object.
 Arguments:
 ""
 
-The ``llvm.objectsize`` intrinsic takes four arguments. The first argument is a
-pointer to or into the ``object``. The second argument determines whether
-``llvm.objectsize`` returns 0 (if true) or -1 (if false) when the object size 
is
-unknown. The third argument controls how ``llvm.objectsize`` acts when ``null``
-in address space 0 is used as its pointer argument. If it's ``false``,
-``llvm.objectsize`` reports 0 bytes available when given ``null``. Otherwise, 
if
-the ``null`` is in a non-zero address space or if ``true`` is given for the
-third argument of ``llvm.objectsize``, we assume its size is unknown. The 
fourth
-argument to ``llvm.objectsize`` determines if the value should be evaluated at
-runtime.
+The ``llvm.objectsize`` intrinsic takes six arguments:
+
+- The first argument is a pointer to or into the ``object``.
+- The second argument controls which value to return when the size is unknown:
+
+  - If it's ``false``, ``llvm.objectsize`` returns ``-1``.
+  - If it's ``true``, ``llvm.objectsize`` returns ``0``.
+
+- The third argument controls how ``llvm.objectsize`` acts when ``null`` in
+  address space 0 is used as its pointer argument:
+
+  - If it's ``false``, ``llvm.objectsize`` reports 0 bytes available when given
+``null``.
+  - If it's ``true``, or the ``null`` pointer is in a non-zero address space,
+the size is assumed to be unknown.
+
+- The fourth argument to ``llvm.objectsize`` determines if the value should be
+  evaluated at runtime.
+- The fifth argument controls which size ``llvm.objectsize`` returns:
+
+  - If it's ``false``, ``llvm.objectsize`` returns the size of the closest
+surrounding subobject.
+  - If it's ``true``, ``llvm.objectsize`` returns the size of the whole object.
+
+- If non-zero, the sixth and seventh arguments encode the size and offset
+  information, respectively, of the original subobject's layout and is used
+  when the fifth argument is ``false``.
+- The seventh argument encodes the offset information of the original
+  subobject's layout and is used when the fifth argument is ``false``.

zygoloid wrote:

I think the information you're passing in here isn't quite what we'd want. If 
I'm reading the code correctly, the offset you're passing in is the field 
offset relative to the immediately-enclosing record type, which doesn't give us 
any information about either where the pointer is within the subobject, or 
where the subobject is within the complete object, so this doesn't seem like it 
can be enough information to produce a correct result.

Rather than passing in the offset of the subobject (relative to an unknown 
anchor point), I think it would be more useful to pass in a pointer to the 
start of the subobject. Or to pass in the offset from the start of the 
subobject to the pointer argument, but that would likely be harder for the 
frontend to calculate (eg, you'd need to work out the offset produced by array 
indexing).

https://github.com/llvm/llvm-project/pull/83204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Don't do casting of atomic FP loads/stores in FE. (PR #83446)

2024-03-11 Thread Jonas Paulsson via cfe-commits

JonPsson1 wrote:

> test the volatile is preserved too?
Tests added for 'atomic volatile' memory accesses as well.


https://github.com/llvm/llvm-project/pull/83446
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Don't do casting of atomic FP loads/stores in FE. (PR #83446)

2024-03-11 Thread Jonas Paulsson via cfe-commits

https://github.com/JonPsson1 updated 
https://github.com/llvm/llvm-project/pull/83446

>From 7abe41b453f5cd2b6ea4b566701531c2c2a73476 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson 
Date: Thu, 29 Feb 2024 14:16:57 +0100
Subject: [PATCH 1/2] Don't do casting of atomic FP loads/stores in FE.

---
 clang/lib/CodeGen/CGAtomic.cpp| 96 ---
 .../CodeGen/SystemZ/atomic_fp_load_store.c| 84 
 clang/test/CodeGen/atomic.c   |  3 +-
 clang/test/CodeGen/c11atomics-ios.c   |  8 +-
 clang/test/OpenMP/atomic_read_codegen.c   | 10 +-
 clang/test/OpenMP/atomic_write_codegen.c  | 13 +--
 6 files changed, 156 insertions(+), 58 deletions(-)
 create mode 100644 clang/test/CodeGen/SystemZ/atomic_fp_load_store.c

diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index a8d846b4f6a592..fb03d013e8afc7 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -194,12 +194,14 @@ namespace {
 RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
  SourceLocation loc, bool AsValue) const;
 
-/// Converts a rvalue to integer value.
-llvm::Value *convertRValueToInt(RValue RVal) const;
+llvm::Value *getScalarRValValueOrNull(RValue RVal) const;
 
-RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
- AggValueSlot ResultSlot,
- SourceLocation Loc, bool AsValue) const;
+/// Converts an rvalue to integer value if needed.
+llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const;
+
+RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot,
+  SourceLocation Loc, bool AsValue,
+  bool CastFP = true) const;
 
 /// Copy an atomic r-value into atomic-layout memory.
 void emitCopyIntoMemory(RValue rvalue) const;
@@ -261,7 +263,8 @@ namespace {
 void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
llvm::AtomicOrdering AO, bool IsVolatile);
 /// Emits atomic load as LLVM instruction.
-llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
+llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile,
+  bool CastFP = true);
 /// Emits atomic compare-and-exchange op as a libcall.
 llvm::Value *EmitAtomicCompareExchangeLibcall(
 llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
@@ -1396,12 +1399,13 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address 
addr,
   LVal.getBaseInfo(), TBAAAccessInfo()));
 }
 
-RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
- AggValueSlot ResultSlot,
- SourceLocation Loc,
- bool AsValue) const {
+RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
+  AggValueSlot ResultSlot,
+  SourceLocation Loc, bool AsValue,
+  bool CastFP) const {
   // Try not to in some easy cases.
-  assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
+  assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) &&
+ "Expected integer or floating point value");
   if (getEvaluationKind() == TEK_Scalar &&
   (((!LVal.isBitField() ||
  LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
@@ -1410,13 +1414,14 @@ RValue 
AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
 auto *ValTy = AsValue
   ? CGF.ConvertTypeForMem(ValueTy)
   : getAtomicAddress().getElementType();
-if (ValTy->isIntegerTy()) {
-  assert(IntVal->getType() == ValTy && "Different integer types.");
-  return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
+if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) {
+  assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) &&
+ "Different integer types.");
+  return RValue::get(CGF.EmitFromMemory(Val, ValueTy));
 } else if (ValTy->isPointerTy())
-  return RValue::get(CGF.Builder.CreateIntToPtr(IntVal, ValTy));
-else if (llvm::CastInst::isBitCastable(IntVal->getType(), ValTy))
-  return RValue::get(CGF.Builder.CreateBitCast(IntVal, ValTy));
+  return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy));
+else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
+  return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy));
   }
 
   // Create a temporary.  This needs to be big enough to hold the
@@ -1433,8 +1438,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value 
*IntVal,
 
   // Slam the integer into the temporary.
   Address CastTemp = castToAtomicI

[clang] [clang-repl] Set up executor implicitly to account for init PTUs (PR #84758)

2024-03-11 Thread Vassil Vassilev via cfe-commits
Stefan =?utf-8?q?Gränitz?= 
Message-ID:
In-Reply-To: 



@@ -14,7 +14,7 @@ struct A { int a; A(int a) : a(a) {} virtual ~A(); };
 // PartialTranslationUnit.
 inline A::~A() { printf("~A(%d)\n", a); }
 
-// Create one instance with new and delete it.
+// Create one instance with new and delete it. We crash here now:
 A *a1 = new A(1);

vgvassilev wrote:

Yes, probably is something worth fixing now. I could not see the stack trace on 
osx. Can you paste it here?

https://github.com/llvm/llvm-project/pull/84758
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-tools-extra] [flang] [lld] [llvm] [flang][clang] Add Visibility specific help text for options (PR #81869)

2024-03-11 Thread Leandro Lupori via cfe-commits

luporl wrote:

> I'm a bit short on spare cycles for reviewing stuff :( @luporl , do you have 
> time to go over this? I'll bump this on my list regardless.

@banach-space, yes, I can go over this in more details.

https://github.com/llvm/llvm-project/pull/81869
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Set and display CSA analysis entry points as notes on debugging (PR #84823)

2024-03-11 Thread Balazs Benics via cfe-commits

https://github.com/steakhal unassigned 
https://github.com/llvm/llvm-project/pull/84823
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Set and display CSA analysis entry points as notes on debugging (PR #84823)

2024-03-11 Thread Balazs Benics via cfe-commits

https://github.com/steakhal unassigned 
https://github.com/llvm/llvm-project/pull/84823
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Set and display CSA analysis entry points as notes on debugging (PR #84823)

2024-03-11 Thread Balazs Benics via cfe-commits

https://github.com/steakhal unassigned 
https://github.com/llvm/llvm-project/pull/84823
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Set and display CSA analysis entry points as notes on debugging (PR #84823)

2024-03-11 Thread Balazs Benics via cfe-commits

https://github.com/steakhal unassigned 
https://github.com/llvm/llvm-project/pull/84823
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Set and display CSA analysis entry points as notes on debugging (PR #84823)

2024-03-11 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-static-analyzer-1

Author: Balazs Benics (steakhal)


Changes

When debugging CSA issues, sometimes it would be useful to have a dedicated 
note for the analysis entry point, aka. the function name you would need to 
pass as "-analyze-function=XYZ" to reproduce a specific issue.
One way we use (or will use) this downstream is to provide tooling on top of 
creduce to enhance to supercharge productivity by automatically reduce cases on 
crashes for example.

This will be added only if the "-analyzer-note-analysis-entry-points" is set or 
the "analyzer-display-progress" is on.

This additional entry point marker will be the first "note" if enabled, with 
the following message: "[invisible] analyzing from XYZ". They are prefixed by 
"[invisible]" to remind the CSA developer that this is only visible or meant to 
be visible for them.

CPP-5012

---

Patch is 20.79 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/84823.diff


11 Files Affected:

- (modified) clang/include/clang/Analysis/PathDiagnostic.h (+7-1) 
- (modified) clang/include/clang/Driver/Options.td (+3) 
- (modified) clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h (+5-4) 
- (modified) clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h 
(+12) 
- (modified) clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h 
(+2) 
- (modified) clang/lib/Analysis/PathDiagnostic.cpp (+5-2) 
- (modified) clang/lib/StaticAnalyzer/Core/BugReporter.cpp (+26-10) 
- (modified) clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp (+3-1) 
- (modified) clang/test/Analysis/analyzer-display-progress.cpp (+33-9) 
- (modified) clang/test/Analysis/analyzer-display-progress.m (+22-9) 
- (added) clang/test/Analysis/analyzer-note-analysis-entry-points.cpp (+75) 


``diff
diff --git a/clang/include/clang/Analysis/PathDiagnostic.h 
b/clang/include/clang/Analysis/PathDiagnostic.h
index 90559e7efb06f0..5907df022e449d 100644
--- a/clang/include/clang/Analysis/PathDiagnostic.h
+++ b/clang/include/clang/Analysis/PathDiagnostic.h
@@ -780,6 +780,9 @@ class PathDiagnostic : public llvm::FoldingSetNode {
   PathDiagnosticLocation UniqueingLoc;
   const Decl *UniqueingDecl;
 
+  /// The top-level entry point from which this issue was discovered.
+  const Decl *AnalysisEntryPoint = nullptr;
+
   /// Lines executed in the path.
   std::unique_ptr ExecutedLines;
 
@@ -788,7 +791,7 @@ class PathDiagnostic : public llvm::FoldingSetNode {
   PathDiagnostic(StringRef CheckerName, const Decl *DeclWithIssue,
  StringRef bugtype, StringRef verboseDesc, StringRef shortDesc,
  StringRef category, PathDiagnosticLocation LocationToUnique,
- const Decl *DeclToUnique,
+ const Decl *DeclToUnique, const Decl *AnalysisEntryPoint,
  std::unique_ptr ExecutedLines);
   ~PathDiagnostic();
 
@@ -852,6 +855,9 @@ class PathDiagnostic : public llvm::FoldingSetNode {
 return *ExecutedLines;
   }
 
+  /// Get the top-level entry point from which this issue was discovered.
+  const Decl *getAnalysisEntryPoint() const { return AnalysisEntryPoint; }
+
   /// Return the semantic context where an issue occurred.  If the
   /// issue occurs along a path, this represents the "central" area
   /// where the bug manifests.
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 5b3d366dbcf91b..55bfd1cc450809 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6656,6 +6656,9 @@ def analyzer_opt_analyze_headers : Flag<["-"], 
"analyzer-opt-analyze-headers">,
 def analyzer_display_progress : Flag<["-"], "analyzer-display-progress">,
   HelpText<"Emit verbose output about the analyzer's progress">,
   MarshallingInfoFlag>;
+def analyzer_note_analysis_entry_points : Flag<["-"], 
"analyzer-note-analysis-entry-points">,
+  HelpText<"Add a note for each bug report to denote their analysis entry 
points">,
+  MarshallingInfoFlag>;
 def analyze_function : Separate<["-"], "analyze-function">,
   HelpText<"Run analysis on specific function (for C++ include parameters in 
name)">,
   MarshallingInfoString>;
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h 
b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 276d11e80a5b21..3a3c1a13d67dd5 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -227,6 +227,7 @@ class AnalyzerOptions : public 
RefCountedBase {
   unsigned ShouldEmitErrorsOnInvalidConfigValue : 1;
   unsigned AnalyzeAll : 1;
   unsigned AnalyzerDisplayProgress : 1;
+  unsigned AnalyzerNoteAnalysisEntryPoints : 1;
 
   unsigned eagerlyAssumeBinOpBifurcation : 1;
 
@@ -291,10 +292,10 @@ class AnalyzerOptions : public 
RefCountedBase {
 ShowCheckerOptionDeveloperList(false), ShowEnabledCheckerList(false),
 

[clang] [analyzer] Set and display CSA analysis entry points as notes on debugging (PR #84823)

2024-03-11 Thread Balazs Benics via cfe-commits

https://github.com/steakhal created 
https://github.com/llvm/llvm-project/pull/84823

When debugging CSA issues, sometimes it would be useful to have a dedicated 
note for the analysis entry point, aka. the function name you would need to 
pass as "-analyze-function=XYZ" to reproduce a specific issue.
One way we use (or will use) this downstream is to provide tooling on top of 
creduce to enhance to supercharge productivity by automatically reduce cases on 
crashes for example.

This will be added only if the "-analyzer-note-analysis-entry-points" is set or 
the "analyzer-display-progress" is on.

This additional entry point marker will be the first "note" if enabled, with 
the following message: "[invisible] analyzing from XYZ". They are prefixed by 
"[invisible]" to remind the CSA developer that this is only visible or meant to 
be visible for them.

CPP-5012

>From 94c6be96d92d8a25693ccbbffedf9edabfe79cc5 Mon Sep 17 00:00:00 2001
From: Balazs Benics 
Date: Mon, 11 Mar 2024 21:18:30 +0100
Subject: [PATCH] [analyzer] Set and display CSA analysis entry points as notes
 on debugging

When debugging CSA issues, sometimes it would be useful to have a
dedicated note for the analysis entry point, aka. the function name you
would need to pass as "-analyze-function=XYZ" to reproduce a specific
issue.
One way we use (or will use) this downstream is to provide tooling on
top of creduce to enhance to supercharge prductivity by automatically
reduce cases on crashes for example.

This will be added only if the "-analyzer-note-analysis-entry-points"
is set or the "analyzer-display-progress" is on.

This additional entry point marker will be the first "note" if enabled,
with the following message: "[invisible] analyzing from XYZ".
They are prefixed by "[invisible]" to remind the CSA developer that this
is only visible or meant to be visible for them.

CPP-5012
---
 clang/include/clang/Analysis/PathDiagnostic.h |  8 +-
 clang/include/clang/Driver/Options.td |  3 +
 .../StaticAnalyzer/Core/AnalyzerOptions.h |  9 ++-
 .../Core/BugReporter/BugReporter.h| 12 +++
 .../Core/PathSensitive/ExprEngine.h   |  2 +
 clang/lib/Analysis/PathDiagnostic.cpp |  7 +-
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 36 ++---
 .../Frontend/AnalysisConsumer.cpp |  4 +-
 .../Analysis/analyzer-display-progress.cpp| 42 ---
 .../test/Analysis/analyzer-display-progress.m | 31 +---
 .../analyzer-note-analysis-entry-points.cpp   | 75 +++
 11 files changed, 193 insertions(+), 36 deletions(-)
 create mode 100644 clang/test/Analysis/analyzer-note-analysis-entry-points.cpp

diff --git a/clang/include/clang/Analysis/PathDiagnostic.h 
b/clang/include/clang/Analysis/PathDiagnostic.h
index 90559e7efb06f0..5907df022e449d 100644
--- a/clang/include/clang/Analysis/PathDiagnostic.h
+++ b/clang/include/clang/Analysis/PathDiagnostic.h
@@ -780,6 +780,9 @@ class PathDiagnostic : public llvm::FoldingSetNode {
   PathDiagnosticLocation UniqueingLoc;
   const Decl *UniqueingDecl;
 
+  /// The top-level entry point from which this issue was discovered.
+  const Decl *AnalysisEntryPoint = nullptr;
+
   /// Lines executed in the path.
   std::unique_ptr ExecutedLines;
 
@@ -788,7 +791,7 @@ class PathDiagnostic : public llvm::FoldingSetNode {
   PathDiagnostic(StringRef CheckerName, const Decl *DeclWithIssue,
  StringRef bugtype, StringRef verboseDesc, StringRef shortDesc,
  StringRef category, PathDiagnosticLocation LocationToUnique,
- const Decl *DeclToUnique,
+ const Decl *DeclToUnique, const Decl *AnalysisEntryPoint,
  std::unique_ptr ExecutedLines);
   ~PathDiagnostic();
 
@@ -852,6 +855,9 @@ class PathDiagnostic : public llvm::FoldingSetNode {
 return *ExecutedLines;
   }
 
+  /// Get the top-level entry point from which this issue was discovered.
+  const Decl *getAnalysisEntryPoint() const { return AnalysisEntryPoint; }
+
   /// Return the semantic context where an issue occurred.  If the
   /// issue occurs along a path, this represents the "central" area
   /// where the bug manifests.
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 5b3d366dbcf91b..55bfd1cc450809 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6656,6 +6656,9 @@ def analyzer_opt_analyze_headers : Flag<["-"], 
"analyzer-opt-analyze-headers">,
 def analyzer_display_progress : Flag<["-"], "analyzer-display-progress">,
   HelpText<"Emit verbose output about the analyzer's progress">,
   MarshallingInfoFlag>;
+def analyzer_note_analysis_entry_points : Flag<["-"], 
"analyzer-note-analysis-entry-points">,
+  HelpText<"Add a note for each bug report to denote their analysis entry 
points">,
+  MarshallingInfoFlag>;
 def analyze_function : Separate<["-"], "analyze-function">,
   HelpText<"Run analysis on specific function (for C++ incl

[clang] [clang][CodeGen] Allow memcpy replace with trivial auto var init (PR #84230)

2024-03-11 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic commented:

This is clearly not what we want long-term.  Either clang or LLVM should have 
better heuristics based on the actual values being stored; the syntax used at 
the C level shouldn't affect codegen.

That said, reverting the change with a carveout for the trivial var init is 
probably reasonable as a temporary step, if we want something we can 
cherry-pick for 18.x.

Please add a better comment explaining the history here; the current comment 
isn't really sufficient to explain why we're using heuristics which don't 
really make sense.

https://github.com/llvm/llvm-project/pull/84230
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Fix warning message when using negative complex range options. (PR #84567)

2024-03-11 Thread Michael Toguchi via cfe-commits

https://github.com/mdtoguchi approved this pull request.


https://github.com/llvm/llvm-project/pull/84567
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Draft/wip/preview of using leaf constructs in clang (PR #84817)

2024-03-11 Thread Alexey Bataev via cfe-commits


@@ -2975,9 +2991,11 @@ StmtResult 
Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   /*isStmtExpr=*/false));
   AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
 }
-Directive = Actions.ActOnOpenMPExecutableDirective(
-DKind, DirName, CancelRegion, Clauses, AssociatedStmt.get(), Loc,
-EndLoc);
+if (!isCombinedConstruct(DKind)) {
+  Directive = Actions.ActOnOpenMPExecutableDirective(
+  DKind, DirName, CancelRegion, Clauses, AssociatedStmt.get(), Loc,
+  EndLoc);
+}

alexey-bataev wrote:

Would be good somehow merge this stuff

https://github.com/llvm/llvm-project/pull/84817
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Draft/wip/preview of using leaf constructs in clang (PR #84817)

2024-03-11 Thread Alexey Bataev via cfe-commits


@@ -2966,7 +2972,17 @@ StmtResult 
Parser::ParseOpenMPDeclarativeOrExecutableDirective(
 getLangOpts().OpenMPIRBuilder)
   AssociatedStmt = Actions.ActOnOpenMPLoopnest(AssociatedStmt.get());
   }
-  AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
+  if (!Leaves.empty()) {
+size_t i = Leaves.size() - 1;
+AssociatedStmt = Actions.ActOnOpenMPRegionEnd(
+AssociatedStmt, Clauses, [&](StmtResult S) {
+  return Actions.ActOnOpenMPExecutableDirective(
+  Leaves[i--], DirName, CancelRegion, Clauses, S.get(),
+  Loc, EndLoc);
+});

alexey-bataev wrote:

Can we somehow avoid callback here?

https://github.com/llvm/llvm-project/pull/84817
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][DebugInfo] Use CGDebugInfo::createFile in CGDebugInfo::CreateCompileUnit (#83174) (PR #83175)

2024-03-11 Thread Aleksei Vetrov via cfe-commits

noxwell wrote:

Closing this PR, because these changes are incompatible to DWARF standard, see 
[83175#issuecomment-1969473992](https://github.com/llvm/llvm-project/pull/83175#issuecomment-1969473992)

https://github.com/llvm/llvm-project/pull/83175
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][DebugInfo] Use CGDebugInfo::createFile in CGDebugInfo::CreateCompileUnit (#83174) (PR #83175)

2024-03-11 Thread Aleksei Vetrov via cfe-commits

https://github.com/noxwell closed 
https://github.com/llvm/llvm-project/pull/83175
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Add optional pass to remove UBSAN traps using PGO (PR #84214)

2024-03-11 Thread via cfe-commits

dyung wrote:

@vitalybuka the test you added `remote-traps.c` seems to be failing on a few 
buildbots, can you take a look?

- https://lab.llvm.org/buildbot/#/builders/231/builds/21492
- https://lab.llvm.org/buildbot/#/builders/280/builds/6

https://github.com/llvm/llvm-project/pull/84214
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HLSL] Implement `rsqrt` intrinsic (PR #84820)

2024-03-11 Thread Farzon Lotfi via cfe-commits

https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/84820

>From a46ecdee6356e744a80f3c29748e7c3482a89760 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi 
Date: Mon, 11 Mar 2024 15:17:35 -0400
Subject: [PATCH] [HLSL] Implement `rsqrt` intrinsic This change implements
 #70074 - `hlsl_intrinsics.h - add the rsqrt api - `DXIL.td` add the llvm
 intrinsic to DXIL op lowering map. - `Builtins.td` - add an hlsl builtin for
 rsqrt. - `CGBuiltin.cpp` add the ir generation for the rsqrt intrinsic. -
 `SemaChecking.cpp` - reuse the one arg float only  checks. -
 `IntrinsicsDirectX.td -add an `rsqrt` intrinsic.

---
 clang/include/clang/Basic/Builtins.td |  6 +++
 clang/lib/CodeGen/CGBuiltin.cpp   |  8 +++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h  | 32 +++
 clang/lib/Sema/SemaChecking.cpp   |  1 +
 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl| 53 +++
 clang/test/SemaHLSL/BuiltIns/dot-warning.ll   | 49 +
 .../test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl  | 27 ++
 llvm/include/llvm/IR/IntrinsicsDirectX.td |  1 +
 llvm/lib/Target/DirectX/DXIL.td   |  3 ++
 llvm/test/CodeGen/DirectX/rsqrt.ll| 31 +++
 10 files changed, 211 insertions(+)
 create mode 100644 clang/test/CodeGenHLSL/builtins/rsqrt.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/dot-warning.ll
 create mode 100644 clang/test/SemaHLSL/BuiltIns/rsqrt-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/rsqrt.ll

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 9c703377ca8d3e..de0cfb4e46b8bd 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4590,6 +4590,12 @@ def HLSLRcp : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLRSqrt : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_rsqrt"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 20c35757939152..d2c83a5e405f42 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18077,6 +18077,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
 ArrayRef{Op0}, nullptr, "dx.rcp");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
+Value *Op0 = EmitScalarExpr(E->getArg(0));
+if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+  llvm_unreachable("rsqrt operand must have a float representation");
+return Builder.CreateIntrinsic(
+/*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
+ArrayRef{Op0}, nullptr, "dx.rsqrt");
+  }
   }
   return nullptr;
 }
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 45f8544392584e..71238a4f268ede 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1153,6 +1153,38 @@ double3 rcp(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
 double4 rcp(double4);
 
+//===--===//
+// rsqrt builtins
+//===--===//
+
+/// \fn T rsqrt(T x)
+/// \brief RReturns the reciprocal of the square root of the specified value \a
+/// x. \param x The specified input value.
+///
+/// This function uses the following formula: 1 / sqrt(x).
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half rsqrt(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half2 rsqrt(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half3 rsqrt(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+half4 rsqrt(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float rsqrt(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float2 rsqrt(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float3 rsqrt(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rsqrt)
+float4 rsqrt(float4);
+
 
//===--===//
 // round builtins
 
//===--===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b630c3fa2..0dafff47ab4040 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5285,6 +5285,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall

  1   2   3   4   5   >