[PATCH] D158641: [AArch64] Fix FMV ifunc resolver usage on old Android APIs. Rename internal compiler-rt FMV functions.

2023-09-29 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8ec50d64464c: [AArch64] Fix FMV ifunc resolver usage on old 
Android APIs. Rename internal… (authored by ilinpv).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158641/new/

https://reviews.llvm.org/D158641

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/attr-target-clones-aarch64.c
  clang/test/CodeGen/attr-target-version.c
  clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
  clang/test/CodeGenCXX/attr-target-version.cpp
  clang/test/Driver/aarch64-features.c
  compiler-rt/lib/builtins/cpu_model.c

Index: compiler-rt/lib/builtins/cpu_model.c
===
--- compiler-rt/lib/builtins/cpu_model.c
+++ compiler-rt/lib/builtins/cpu_model.c
@@ -1239,13 +1239,12 @@
   // As features grows new fields could be added
 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
 
-void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {
+static void __init_cpu_features_constructor(unsigned long hwcap,
+const __ifunc_arg_t *arg) {
 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number)\
   (val & ((1ULL << number) - 1ULL) << start) >> start
-  if (__aarch64_cpu_features.features)
-return;
   unsigned long hwcap2 = 0;
   if (hwcap & _IFUNC_ARG_HWCAP)
 hwcap2 = arg->_hwcap2;
@@ -1427,7 +1426,24 @@
   setCPUFeature(FEAT_MAX);
 }
 
-void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
+void __init_cpu_features_resolver(unsigned long hwcap,
+  const __ifunc_arg_t *arg) {
+  if (__aarch64_cpu_features.features)
+return;
+#if defined(__ANDROID__)
+  // ifunc resolvers don't have hwcaps in arguments on Android API lower
+  // than 30. If so, set feature detection done and keep all CPU features
+  // unsupported (zeros). To detect this case in runtime we check existence
+  // of memfd_create function from Standard C library which was introduced in
+  // Android API 30.
+  int memfd_create(const char *, unsigned int) __attribute__((weak));
+  if (!memfd_create)
+return;
+#endif // defined(__ANDROID__)
+  __init_cpu_features_constructor(hwcap, arg);
+}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   unsigned long hwcap;
   unsigned long hwcap2;
   // CPU features already initialized.
@@ -1452,7 +1468,7 @@
   arg._size = sizeof(__ifunc_arg_t);
   arg._hwcap = hwcap;
   arg._hwcap2 = hwcap2;
-  init_cpu_features_resolver(hwcap | _IFUNC_ARG_HWCAP, );
+  __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, );
 #undef extractBits
 #undef getCPUFeature
 #undef setCPUFeature
Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -7,15 +7,17 @@
 // CHECK: fno-signed-char
 
 // Check Function Multi Versioning option and rtlib dependency.
-// RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt \
+// RUN: %clang --target=aarch64-linux-android23 -rtlib=compiler-rt \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV %s
-
+// RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
 // RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt -mno-fmv \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
+// RUN: %clang --target=aarch64-linux-android22 -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
 
 // RUN: %clang --target=aarch64-linux-gnu -rtlib=libgcc \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
-
 // RUN: %clang --target=arm64-unknown-linux -rtlib=libgcc \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
 
Index: clang/test/CodeGenCXX/attr-target-version.cpp
===
--- clang/test/CodeGenCXX/attr-target-version.cpp
+++ clang/test/CodeGenCXX/attr-target-version.cpp
@@ -78,7 +78,7 @@
 // CHECK-NEXT:ret i32 [[ADD3]]
 // CHECK-LABEL: @_ZN7MyClass3gooEi.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:call void @init_cpu_features_resolver()
+// CHECK-NEXT:call void @__init_cpu_features_resolver()
 // CHECK-NEXT:[[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:[[TMP1:%.*]] = and i64 [[TMP0]], 1024
 // CHECK-NEXT:[[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1024
@@ -98,7 +98,7 @@
 // CHECK-NEXT:ret ptr @_ZN7MyClass3gooEi
 // CHECK-LABEL: @_Z3fooi.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:call void 

[PATCH] D158641: [AArch64] Fix FMV ifunc resolver usage on old Android APIs. Rename internal compiler-rt FMV functions.

2023-09-25 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

The patch tested on NDK r26 bulding simple Function Multi Versioning project 
and running on Android API 25,29,30,33 - works fine.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158641/new/

https://reviews.llvm.org/D158641

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158641: [AArch64] Fix FMV ifunc resolver usage on old Android APIs. Rename internal compiler-rt FMV functions.

2023-09-25 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 557332.
ilinpv retitled this revision from "[AArch64][Android][DRAFT] Fix FMV ifunc 
resolver usage on old Android APIs." to "[AArch64] Fix FMV ifunc resolver usage 
on old Android APIs. Rename internal compiler-rt FMV functions.".
ilinpv edited the summary of this revision.
ilinpv added a comment.
Herald added a subscriber: dberris.

Use weak "memfd_create" to check for API 30, split and rename init cpu features 
functions ( __init_cpu_features, __init_cpu_features_resolver, 
__init_cpu_features_constructor )


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158641/new/

https://reviews.llvm.org/D158641

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/attr-target-clones-aarch64.c
  clang/test/CodeGen/attr-target-version.c
  clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
  clang/test/CodeGenCXX/attr-target-version.cpp
  clang/test/Driver/aarch64-features.c
  compiler-rt/lib/builtins/cpu_model.c

Index: compiler-rt/lib/builtins/cpu_model.c
===
--- compiler-rt/lib/builtins/cpu_model.c
+++ compiler-rt/lib/builtins/cpu_model.c
@@ -1186,13 +1186,12 @@
   // As features grows new fields could be added
 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
 
-void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {
+static void __init_cpu_features_constructor(unsigned long hwcap,
+const __ifunc_arg_t *arg) {
 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number)\
   (val & ((1ULL << number) - 1ULL) << start) >> start
-  if (__aarch64_cpu_features.features)
-return;
   unsigned long hwcap2 = 0;
   if (hwcap & _IFUNC_ARG_HWCAP)
 hwcap2 = arg->_hwcap2;
@@ -1374,7 +1373,24 @@
   setCPUFeature(FEAT_MAX);
 }
 
-void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
+void __init_cpu_features_resolver(unsigned long hwcap,
+  const __ifunc_arg_t *arg) {
+  if (__aarch64_cpu_features.features)
+return;
+#if defined(__ANDROID__)
+  // ifunc resolvers don't have hwcaps in arguments on Android API lower
+  // than 30. If so, set feature detection done and keep all CPU features
+  // unsupported (zeros). To detect this case in runtime we check existence
+  // of memfd_create function from Standard C library which was introduced in
+  // Android API 30.
+  int memfd_create(const char *, unsigned int) __attribute__((weak));
+  if (!memfd_create)
+return;
+#endif // defined(__ANDROID__)
+  __init_cpu_features_constructor(hwcap, arg);
+}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   unsigned long hwcap;
   unsigned long hwcap2;
   // CPU features already initialized.
@@ -1399,7 +1415,7 @@
   arg._size = sizeof(__ifunc_arg_t);
   arg._hwcap = hwcap;
   arg._hwcap2 = hwcap2;
-  init_cpu_features_resolver(hwcap | _IFUNC_ARG_HWCAP, );
+  __init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, );
 #undef extractBits
 #undef getCPUFeature
 #undef setCPUFeature
Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -7,15 +7,17 @@
 // CHECK: fno-signed-char
 
 // Check Function Multi Versioning option and rtlib dependency.
-// RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt \
+// RUN: %clang --target=aarch64-linux-android23 -rtlib=compiler-rt \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV %s
-
+// RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
 // RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt -mno-fmv \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
+// RUN: %clang --target=aarch64-linux-android22 -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
 
 // RUN: %clang --target=aarch64-linux-gnu -rtlib=libgcc \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
-
 // RUN: %clang --target=arm64-unknown-linux -rtlib=libgcc \
 // RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FMV-OFF %s
 
Index: clang/test/CodeGenCXX/attr-target-version.cpp
===
--- clang/test/CodeGenCXX/attr-target-version.cpp
+++ clang/test/CodeGenCXX/attr-target-version.cpp
@@ -78,7 +78,7 @@
 // CHECK-NEXT:ret i32 [[ADD3]]
 // CHECK-LABEL: @_ZN7MyClass3gooEi.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:call void @init_cpu_features_resolver()
+// CHECK-NEXT:call void @__init_cpu_features_resolver()
 // CHECK-NEXT:[[TMP0:%.*]] 

[PATCH] D158641: [AArch64][Android][DRAFT] Fix FMV ifunc resolver usage on old Android APIs.

2023-09-08 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:1382
+return;
+#if defined(__ANDROID__)
+  // ifunc resolvers don't have hwcaps in arguments on Android API lower

enh wrote:
> rprichard wrote:
> > enh wrote:
> > > srhines wrote:
> > > > MaskRay wrote:
> > > > > enh wrote:
> > > > > > ilinpv wrote:
> > > > > > > MaskRay wrote:
> > > > > > > > I am unfamiliar with how Android ndk builds compiler-rt.
> > > > > > > > 
> > > > > > > > If `__ANDROID_API__ >= 30`, shall we use the regular Linux code 
> > > > > > > > path?
> > > > > > > I think that leads to shipping different compile-rt libraries 
> > > > > > > depend on ANDROID_API. If this is an option to consider than 
> > > > > > > runtime check android_get_device_api_level() < 30 can be replaced 
> > > > > > > by `__ANDROID_API__ < 30`
> > > > > > depends what you mean... in 10 years or so, yes, no-one is likely 
> > > > > > to still care about the older API levels and we can just delete 
> > > > > > this. but until then, no, there's _one_ copy of compiler-rt that 
> > > > > > everyone uses, and although _OS developers_ don't need to support 
> > > > > > anything more than a couple of years old, most app developers are 
> > > > > > targeting far lower API levels than that (to maximize the number of 
> > > > > > possible customers).
> > > > > > 
> > > > > > TL;DR: "you could add that condition to the `#if`, but no-one would 
> > > > > > use it for a decade". (and i think the comment and `if` below 
> > > > > > should make it clear enough to future archeologists when this code 
> > > > > > block can be removed :-) )
> > > > > My thought was that people build Android with a specific 
> > > > > `__ANDROID_API__`, and only systems >= this level are supported.
> > > > > ```
> > > > > #If __ANDROID_API__ < 30
> > > > > ...
> > > > > #endif
> > > > > ```
> > > > > 
> > > > > This code has a greater chance to be removed when it becomes 
> > > > > obsoleted. The argument is similar to how we find obsoleted GCC 
> > > > > workarounds.
> > > > Yes, the NDK currently just ships the oldest supported target API level 
> > > > for compiler-rt, while the Android platform build does have access to 
> > > > both the oldest supported target API level + the most recent target API 
> > > > level, so that we can make better use of features there.
> > > > 
> > > > Maybe I'm missing something, but it's feeling like the NDK users won't 
> > > > be able to make great use of FMV without us either bumping the minimum 
> > > > level or shipping multiple runtimes (and then using the #ifdefs 
> > > > properly here).
> > > > Maybe I'm missing something, but it's feeling like the NDK users won't 
> > > > be able to make great use of FMV without us either bumping the minimum 
> > > > level or shipping multiple runtimes (and then using the #ifdefs 
> > > > properly here).
> > > 
> > > yeah, that's the point of this code --- it's a runtime check so the NDK 
> > > "just works".
> > > 
> > > but if people want the `__ANDROID_API__` `#if` too, that's fine. (and, 
> > > like you say, the platform's two variants mean that we'll be testing both 
> > > code paths, so i'm not worried about "one of these is the only one that 
> > > anyone's actually building" problem.)
> > > 
> > > i have no opinion on whether anyone llvm is more/less likely to 
> > > understand if/when `if (android_get_device_api_level() < 30)` versus `#if 
> > > __ANDROID_API__ < 30` can be deleted.
> > > 
> > > i think the best argument for leaving this change as-is would be "anyone 
> > > building their own is less likely to screw up" (since developers struggle 
> > > all the time with the difference between "target" and "min" api, because 
> > > the ndk terminology is different to the AndroidManifest.xml stuff that 
> > > developers are more familiar with, which causes confusion). so if this 
> > > was in libc++ (where i know people do build their own), i'd argue for the 
> > > code as-is. but since it's compiler-rt (and i'm not aware that anyone's 
> > > building that themselves) i don't think it matters either way?
> > > 
> > > to be clear, i'm imagining:
> > > ```
> > > #if __ANDROID_API__ < 30
> > >   if (android_get_device_api_level() < 30) {
> > > setCPUFeature(FEAT_MAX);
> > > return;
> > >   }
> > > #endif
> > > ```
> > > (which brings us back to the "this is confusing" --- _just_ having the 
> > > `#if` would be subtly different, which is why if i'd written this, i'd 
> > > have written it as-is too!)
> > Unless I'm missing something, calling android_get_device_api_level doesn't 
> > work, because (a) the loader hasn't performed the necessary relocations and 
> > (b) that API reads system properties, which haven't been initialized yet.
> > 
> > Maybe the device API could/should be exported to a /dev file, which is how 
> > we exported the CPU variant to ifunc resolvers.
> > 
> > We could redesign Bionic so that an ifunc resolver can call 
> > 

[PATCH] D158641: [AArch64][Android][DRAFT] Fix FMV ifunc resolver usage on old Android APIs.

2023-09-08 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:1379
 
+void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) 
{
+  if (__aarch64_cpu_features.features)

ilinpv wrote:
> MaskRay wrote:
> > It seems that we don't need the `_constructor` function. We can just move 
> > the 
> > 
> > ```
> > #if defined(__ANDROID__)
> >   // ifunc resolvers don't have hwcaps in arguments on Android API lower
> >   // than 30. In this case set detection done and keep all CPU features
> >   // unsupported (zeros).
> >   if (android_get_device_api_level() < 30) {
> > setCPUFeature(FEAT_MAX);
> > return;
> >   }
> > ```
> > 
> > logic to init_cpu_features_resolver
> We don't need this "Android API check and return" in 
> init_cpu_features_resolver when it is called from constructor 
> ##init_cpu_features## where hwcaps are obtained through getauxval calls.
It seems there is no other way than split it in ##_constructor## and 
##_resolver## parts.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158641/new/

https://reviews.llvm.org/D158641

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158641: [AArch64][Android][DRAFT] Fix FMV ifunc resolver usage on old Android APIs.

2023-09-08 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:1379
 
+void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) 
{
+  if (__aarch64_cpu_features.features)

MaskRay wrote:
> It seems that we don't need the `_constructor` function. We can just move the 
> 
> ```
> #if defined(__ANDROID__)
>   // ifunc resolvers don't have hwcaps in arguments on Android API lower
>   // than 30. In this case set detection done and keep all CPU features
>   // unsupported (zeros).
>   if (android_get_device_api_level() < 30) {
> setCPUFeature(FEAT_MAX);
> return;
>   }
> ```
> 
> logic to init_cpu_features_resolver
We don't need this "Android API check and return" in init_cpu_features_resolver 
when it is called from constructor ##init_cpu_features## where hwcaps are 
obtained through getauxval calls.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158641/new/

https://reviews.llvm.org/D158641

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150867: [AArch64][FMV] Prevent target attribute using for multiversioning.

2023-09-06 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: clang/lib/Sema/SemaDecl.cpp:11544
+  // Target attribute on AArch64 is not used for multiversioning
+  if (NewTA && S.getASTContext().getTargetInfo().getTriple().isAArch64())
+return false;

Allen wrote:
> I find the attribute **target_clones/target_version** also not be used to 
> generate multiversioning function? https://godbolt.org/z/cYWsbrPn9
> so it is not support on AArch64 ?
Function multiversioning depends on compiler-rt library, specify 
--rtlib=compiler-rt to make it work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150867/new/

https://reviews.llvm.org/D150867

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158963: [CodeGen] Function multi-versioning: don't set comdat for internal linkage resolvers

2023-09-04 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:208
   (`#61334 `_)
+- For function multi-versioning using the ``target`` or ``target_clones``
+  attributes, remove comdat for internal linkage functions.

If I read the code right it applies for function multi-versioning using 
`target_version` and `target_clones` attributes ( AArch64 ) as well, doesn't it?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158963/new/

https://reviews.llvm.org/D158963

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D159398: [AArch64][Clang] Disable outline atomics in freestanding env

2023-09-04 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

> Offtopic: Outlining atomics seems to be very CPU specific thing. In my 
> experience LSE were ~= old exclusive semantics. So adding extra call + extra 
> bit check (too bad IFUNCs are not used :)) each time it would be executed 
> seems to be quite an extra load (for CPU, TLB, dcache..), so I'm not sure 
> that outline atomics is a win-win thing (at least on some of the CPUs). This 
> is absolutely not a case for this patch anyway, just some of my thoughts, I 
> would be glad to hear other opinions :)

Outline atomics overhead is mostly negligible. "Various members in the Arm 
ecosystem have measured the performance impact of this indirection on a diverse 
set of systems and we were happy to find out that it was minimal compared to 
the benefit of using the LSE instructions for better scalability at large core 
counts." 
https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
For IFUNCs Function Multi Versioning 
https://github.com/ARM-software/acle/blob/main/main/acle.md#function-multi-versioning
 ##lse## feature can be used.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D159398/new/

https://reviews.llvm.org/D159398

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D159398: [AArch64][Clang] Disable outline atomics in freestanding env

2023-09-04 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

> I understand that you could disable it with extra option and that for now it 
> would be different with gcc, but it looks debatable to me that such a 
> behaviour in gcc is correct and expected, maybe someone need to change it 
> there too. (Please keep in mind that I might be wrong with my position and 
> during discussion please consider this patch as NFC so we could discuss here 
> am I right or not :)). My point is that the gcc is not a golden standard in 
> this questions too and in my mind such a behaviour (disabling extra 
> dependencies in freestanding mode without passing extra flags) would be 
> expected the very same way the -fno-builtint get's auto implied with 
> -ffreestanding flag passed.

I think we are on the same page here. I am not a freestanding mode user and 
respect opinions of real users, but we need to keep compilers aligned, so to 
make such changes we need GCC community consensus as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D159398/new/

https://reviews.llvm.org/D159398

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D159398: [AArch64][Clang] Disable outline atomics in freestanding env

2023-09-04 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Outline atomics are dependent on runtime library availability ( libgcc or 
compler-rt ). If there are no proper library available they will be disabled. 
So if in freestanding mode compiler is not dependent on runtime library you can 
remove it and get rid of outline atomics calls automatically.
Having runtime library in freestanding mode you can disable outline atomics 
specifying ##-mno-outline-atomics##  option. But disabling them just by 
##-ffreestanding## option will create divergence with GCC behaviour, which has 
outline atomics not disabled in this case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D159398/new/

https://reviews.llvm.org/D159398

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D159398: [AArch64][Clang] Disable outline atomics in freestanding env

2023-09-04 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

From what i understand in freestanding environment runtime library is not used 
and clang driver can detect this situation and set outline atomics off by 
default ( see ##clang/lib/Driver/ToolChains/Linux.cpp## 
##Linux::IsAArch64OutlineAtomicsDefault## -> ##GetRuntimeLibType## )


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D159398/new/

https://reviews.llvm.org/D159398

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D159174: [Clang] Use stable_sort in AppendTargetMangling

2023-09-04 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D159174#4635886 , @BeMg wrote:

> I'm working on RISC-V FMV support, and we found the large set of extension 
> features is hard to maintain the priority that doesn't collision at all.

On AArch64 for FMV we are using ##target_version## attribute and 
##AppendTargetVersionMangling## with llvm::stable_sort


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D159174/new/

https://reviews.llvm.org/D159174

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158641: [AArch64][Android][DRAFT] Fix FMV ifunc resolver usage on old Android APIs.

2023-08-24 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:1382
+return;
+#if defined(__ANDROID__)
+  // ifunc resolvers don't have hwcaps in arguments on Android API lower

MaskRay wrote:
> I am unfamiliar with how Android ndk builds compiler-rt.
> 
> If `__ANDROID_API__ >= 30`, shall we use the regular Linux code path?
I think that leads to shipping different compile-rt libraries depend on 
ANDROID_API. If this is an option to consider than runtime check 
android_get_device_api_level() < 30 can be replaced by `__ANDROID_API__ < 30`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158641/new/

https://reviews.llvm.org/D158641

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158641: [AArch64][Android][DRAFT] Fix FMV ifunc resolver usage on old Android APIs.

2023-08-23 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: srhines, danielkiss, enh, MaskRay, rprichard.
Herald added subscribers: Enna1, kristof.beyls, krytarowski.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added projects: clang, Sanitizers.
Herald added subscribers: Sanitizers, cfe-commits.

The patch tries to fix Function Multi Versioning features detection in ifunc
resolver on Android API levels < 30.

Ifunc hwcaps parameters are not supported 

 on Android API levels 23-29,
so all CPU features are set unsupported if they were not initialized
before ifunc resolver call.

There is no support for ifunc on Android API levels < 23, so Function
Multi Versioning is disabled in this case.

However, applying this patch to android-ndk-r26-beta2 and build using it a 
simple app with FMV and feature detection lead to crash when FMV ifunc resolver 
called. I suspect issue could be in Android API runtime check called from ifunc 
resolver: ifunc_resolver -> init_cpu_features_resolver -> 
android_get_device_api_level(__system_property_get).
Part of crash stack trace ( thanks to Lingkai.Dong ) :

  08-23 15:37:16.824  4641  4641 F DEBUG   : backtrace:
  08-23 15:37:16.824  4641  4641 F DEBUG   :   #00 pc 1af0  

  08-23 15:37:16.824  4641  4641 F DEBUG   :   #01 pc 15b8  
/data/app/~~dMaXGzJksZog7lGF_CEMHQ==/com.arm.v9.demo-C74mlVTkH7NumAtbKGo36A==/base.apk!libdemo.so
 (offset 0x3e9000) (BuildId: 91106fcefd2e1eb3f7567f0c9b297d48ebfdf452)
  08-23 15:37:16.824  4641  4641 F DEBUG   :   #02 pc 10e8  
/data/app/~~dMaXGzJksZog7lGF_CEMHQ==/com.arm.v9.demo-C74mlVTkH7NumAtbKGo36A==/base.apk!libdemo.so
 (offset 0x3e9000) (BuildId: 91106fcefd2e1eb3f7567f0c9b297d48ebfdf452)
  08-23 15:37:16.824  4641  4641 F DEBUG   :   #03 pc 0004c2f0  
/apex/com.android.runtime/bin/linker64 (__dl__Z19call_ifunc_resolvery+36) 
(BuildId: 87cff915f050a1eab12b7d6dd7c25a63)
  08-23 15:37:16.824  4641  4641 F DEBUG   :   #04 pc 0005dbd8  
/apex/com.android.runtime/bin/linker64 
(__dl__ZL26process_relocation_generalR9RelocatorRK10elf64_rela.__uniq.153370809355997480299804515629147722701+1500)
 (BuildId: 87cff915f050a1eab12b7d6dd7c25a63)

I would appreciate any ideas what could be wrong in the patch, further 
confirmation if android_get_device_api_level should work from ifunc_resolver, 
or tips for ifunc_resolver debugging on Android :)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D158641

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/aarch64-features.c
  compiler-rt/lib/builtins/cpu_model.c


Index: compiler-rt/lib/builtins/cpu_model.c
===
--- compiler-rt/lib/builtins/cpu_model.c
+++ compiler-rt/lib/builtins/cpu_model.c
@@ -894,6 +894,7 @@
 #include 
 
 #if defined(__ANDROID__)
+#include 
 #include 
 #include 
 #elif defined(__Fuchsia__)
@@ -1186,7 +1187,8 @@
   // As features grows new fields could be added
 } __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
 
-void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) 
{
+static void init_cpu_features_constructor(unsigned long hwcap,
+  const __ifunc_arg_t *arg) {
 #define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number)
\
@@ -1374,6 +1376,21 @@
   setCPUFeature(FEAT_MAX);
 }
 
+void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) 
{
+  if (__aarch64_cpu_features.features)
+return;
+#if defined(__ANDROID__)
+  // ifunc resolvers don't have hwcaps in arguments on Android API lower
+  // than 30. In this case set detection done and keep all CPU features
+  // unsupported (zeros).
+  if (android_get_device_api_level() < 30) {
+setCPUFeature(FEAT_MAX);
+return;
+  }
+#endif // defined(__ANDROID__)
+  init_cpu_features_constructor(hwcap, arg);
+}
+
 void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
   unsigned long hwcap;
   unsigned long hwcap2;
@@ -1399,7 +1416,7 @@
   arg._size = sizeof(__ifunc_arg_t);
   arg._hwcap = hwcap;
   arg._hwcap2 = hwcap2;
-  init_cpu_features_resolver(hwcap | _IFUNC_ARG_HWCAP, );
+  init_cpu_features_constructor(hwcap | _IFUNC_ARG_HWCAP, );
 #undef extractBits
 #undef getCPUFeature
 #undef setCPUFeature
Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -7,12 +7,18 @@
 // CHECK: fno-signed-char
 
 // Check Function Multi Versioning option and rtlib dependency.
-// RUN: %clang --target=aarch64-linux-android -rtlib=compiler-rt \
+// RUN: %clang 

[PATCH] D152914: [Draft] Make __builtin_cpu builtins target-independent

2023-08-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Friendly ping, are there any questions remained to proceed with 
target-independent __builtin_cpu_supports ?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152914/new/

https://reviews.llvm.org/D152914

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152914: [Draft] Make __builtin_cpu builtins target-independent

2023-07-24 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: llvm/include/llvm/IR/Intrinsics.td:903-907
+// Load of a value provided by the system library at a fixed address. Used for
+// accessing things like HWCAP word provided by GLIBC.
+def int_fixed_addr_ld
+: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty],
+[IntrInaccessibleMemOnly, ImmArg>]>;

nemanjai wrote:
> arsenm wrote:
> > nemanjai wrote:
> > > arsenm wrote:
> > > > nemanjai wrote:
> > > > > arsenm wrote:
> > > > > > From this description I don't understand what this is supposed to 
> > > > > > do. What does the input mean? Why does this use an i32 immarg and 
> > > > > > not a pointer? Why is the result only i32?
> > > > > That is fair enough. The description is fairly vague. I can try to 
> > > > > improve it as per below. The parameter for this is not a pointer 
> > > > > (i.e. not an address). It is an immediate that represents the index 
> > > > > into the enumeration of values that are provided at a fixed address. 
> > > > > The back end is then free to produce the actual fixed address and the 
> > > > > load itself.
> > > > > The choice for the result type was admittedly arbitrary - on PPC, the 
> > > > > values provided by GLIBC are 32-bit words.
> > > > > 
> > > > > Proposed comment describing this intrinsic:
> > > > > ```
> > > > > // This intrinsic is provided to allow back ends to emit load
> > > > > // instructions that load a value from a fixed address. The
> > > > > // parameter to the intrinsic is not an address, but an
> > > > > // immediate index into an enumeration that contains the
> > > > > // union of all such values available on all back ends.
> > > > > // An example is the HWCAP/HWCAP2/CPUID words
> > > > > // provided by GLIBC on PowerPC to allow fast access
> > > > > // to commonly used parts of AUXV. These are provided
> > > > > // at a fixed offset into the TCB (accessible through the
> > > > > // thread pointer).
> > > > > ```
> > > > This is baking an a very target specific implementation of device 
> > > > identification. Could you redefine this as something more abstract? 
> > > > Like returns a device ID integer, or a bool that some int input is 
> > > > supported?
> > > The idea is for this to not be restricted to device ID at all, but to be 
> > > used for any values that reside in a fixed address for the compiler. For 
> > > example, `STACK_GUARD` can be one of the values. How about if the 
> > > intrinsic returns any integer type (or maybe any type)?
> > > My thinking is that there may be need in the future for various things to 
> > > be loaded from fixed addresses. The list of possible things that can be 
> > > loaded this way would be a union of what all targets want and only 
> > > specific values would make sense on each target.
> > But you're assuming there's a fixed address this can be loaded from, and 
> > not a read from a special register or some other mechanism
> Oh, well sure. My intent was just for those things that are in memory at a 
> fixed address. But I suppose you're right, there is probably not a 
> fundamental reason to restrict it to things at fixed addresses.
> 
> What I am trying to avoid here is defining an intrinsic that takes a specific 
> value and returns a `bool` (i.e. the intrinsic version of 
> `__builtin_cpu_{supports|is}`. The features/cpus will be different for every 
> target and mapping features to integers is also target specific. So this was 
> an attempt to implement an intrinsic that gets a value (bit vector if you 
> will) that the target can choose how to lower and what to do with the value 
> (i.e. how to mask it).
> 
> I suppose each target can define their own intrinsics for accessing CPU 
> identification information since each target has to provide code generation 
> for them anyway.
I would not restrict targets to specific instrinsics for feature detection. 
Please notice that currently CPU identification is more complicated than 
HWCAPs, including reads from system registers. It is done in compiler-rt 
library (**cpu_model.c**), in **init_cpu_features_resolver** for AArch64 and 
**getAvailableFeatures** for X86.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152914/new/

https://reviews.llvm.org/D152914

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152914: [Draft] Make __builtin_cpu builtins target-independent

2023-07-19 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D152914#4497599 , @nemanjai wrote:

> I took a quick look at your patch. I think it would be preferable to make the 
> builtins target-independent rather than implementing the builtin by the same 
> name for multiple targets. Although I think it is very useful to support a 
> plus-separated list for `__builtin_cpu_supports()`, I think that's probably 
> something for a subsequent patch. We would need to figure out code generation 
> for that - perhaps that part will have to be completely target specific.

I fully agree with you to make  `__builtin_cpu_supports()` target-independent ( 
and I will update my patch on top of yours ). If plus-separated format is not 
supported by all target then I would suggest to make SemaBuiltinCpuSupports 
target-dependent - it will allow me to imlement plus-separated format on 
AArch64 keeping  `__builtin_cpu_supports()` target-independent.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152914/new/

https://reviews.llvm.org/D152914

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152914: [Draft] Make __builtin_cpu builtins target-independent

2023-06-16 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Thank you for the patch, it comes in the right time - we are also working on 
AArch64 __builtin_cpu_supports, and I was thinking how to make it more general.
I uploaded our RFC version for review https://reviews.llvm.org/D153153
It would be great to have in __builtin_cpu_supports argument string of 
plus-separated features. Just SemaBuiltinCpuSupports need to handle this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152914/new/

https://reviews.llvm.org/D152914

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D153153: [AArch64][RFC][Draft] Implement __builtin_cpu_supports, compiler-rt tests.

2023-06-16 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: danielkiss, kristof.beyls, nemanjai, arsenm.
Herald added subscribers: Enna1, dberris.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added subscribers: Sanitizers, cfe-commits, wdng.
Herald added projects: clang, Sanitizers.

The patch adds AArch64 target builtin

  
  __builtin_cpu_supports("feature1+...+featureN")


which return true if all specified CPU features in argument are detected.
Also native run aarch64 tests for compiler-rt features detection mechanism
were added.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153153

Files:
  clang/include/clang/Basic/BuiltinsAArch64.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/aarch64-cpu-supports-target.c
  clang/test/CodeGen/aarch64-cpu-supports.c
  clang/test/Sema/aarch64-cpu-supports.c
  compiler-rt/test/builtins/Unit/aarch64_cpu_features_test.c

Index: compiler-rt/test/builtins/Unit/aarch64_cpu_features_test.c
===
--- /dev/null
+++ compiler-rt/test/builtins/Unit/aarch64_cpu_features_test.c
@@ -0,0 +1,17 @@
+// REQUIRES: aarch64-target-arch
+// REQUIRES: native-run
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_cpu_model
+int main(void) {
+  if (__builtin_cpu_supports("fp+simd+pmull+sha2+crc")) {
+if (__builtin_cpu_supports("fp") && __builtin_cpu_supports("simd") &&
+__builtin_cpu_supports("pmull") && __builtin_cpu_supports("sha2") &&
+__builtin_cpu_supports("crc")) {
+  return 0;
+} else {
+  // Something wrong in feature detection
+  return 1;
+}
+  }
+  return 0;
+}
Index: clang/test/Sema/aarch64-cpu-supports.c
===
--- /dev/null
+++ clang/test/Sema/aarch64-cpu-supports.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -fsyntax-only -triple aarch64-linux-gnu -verify %s
+
+int test_aarch64_features(void) {
+  char * ssbs2;
+  // expected-error@+1 {{expression is not a string literal}}
+  if (__builtin_cpu_supports(ssbs2))
+return 1;
+  // expected-error@+1 {{invalid cpu feature string}}
+  if (__builtin_cpu_supports(""))
+return 2;
+  // expected-error@+1 {{invalid cpu feature string}}
+  if (__builtin_cpu_supports("pmull128"))
+return 3;
+  // expected-error@+1 {{invalid cpu feature string}}
+  if (__builtin_cpu_supports("sve2,rpres"))
+return 4;
+  // expected-error@+1 {{invalid cpu feature string}}
+  if (__builtin_cpu_supports("dgh+sve2-pmull"))
+return 5;
+  // expected-error@+1 {{invalid cpu feature string}}
+  if (__builtin_cpu_supports("default"))
+return 6;
+  if (__builtin_cpu_supports(" ssbs + bti "))
+return 7;
+  return 0;
+}
Index: clang/test/CodeGen/aarch64-cpu-supports.c
===
--- /dev/null
+++ clang/test/CodeGen/aarch64-cpu-supports.c
@@ -0,0 +1,54 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 2
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
+// CHECK-LABEL: define dso_local i32 @main
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT:store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT:[[TMP1:%.*]] = and i64 [[TMP0]], 70368744177664
+// CHECK-NEXT:[[TMP2:%.*]] = icmp eq i64 [[TMP1]], 70368744177664
+// CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-NEXT:br i1 [[TMP3]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+// CHECK:   if.then:
+// CHECK-NEXT:store i32 1, ptr [[RETVAL]], align 4
+// CHECK-NEXT:br label [[RETURN:%.*]]
+// CHECK:   if.end:
+// CHECK-NEXT:[[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT:[[TMP5:%.*]] = and i64 [[TMP4]], 9070970929152
+// CHECK-NEXT:[[TMP6:%.*]] = icmp eq i64 [[TMP5]], 9070970929152
+// CHECK-NEXT:[[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-NEXT:br i1 [[TMP7]], label [[IF_THEN1:%.*]], label [[IF_END2:%.*]]
+// CHECK:   if.then1:
+// CHECK-NEXT:store i32 2, ptr [[RETVAL]], align 4
+// CHECK-NEXT:br label [[RETURN]]
+// CHECK:   if.end2:
+// CHECK-NEXT:[[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-NEXT:[[TMP9:%.*]] = and i64 [[TMP8]], 166633186212708352
+// CHECK-NEXT:[[TMP10:%.*]] = icmp eq i64 [[TMP9]], 166633186212708352
+// CHECK-NEXT:[[TMP11:%.*]] = and i1 true, [[TMP10]]
+// CHECK-NEXT:br i1 [[TMP11]], label [[IF_THEN3:%.*]], label [[IF_END4:%.*]]
+// CHECK:   if.then3:
+// CHECK-NEXT:store i32 3, ptr [[RETVAL]], align 4
+// 

[PATCH] D150867: [AArch64][FMV] Prevent target attribute using for multiversioning.

2023-05-23 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb6db864a2fdf: [AArch64][FMV] Prevent target attribute using 
for multiversioning. (authored by ilinpv).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150867/new/

https://reviews.llvm.org/D150867

Files:
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Sema/attr-target-version.c


Index: clang/test/Sema/attr-target-version.c
===
--- clang/test/Sema/attr-target-version.c
+++ clang/test/Sema/attr-target-version.c
@@ -15,15 +15,22 @@
 //expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("default"))) foo(void) { return 2; }
 
-//expected-note@+1 {{previous declaration is here}}
+//expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("sha3 + pmull "))) foo(void) { return 1; }
+//expected-note@-1 {{previous definition is here}}
 
-//expected-error@+1 {{multiversioning attributes cannot be combined}}
+//expected-error@+1 {{redefinition of 'foo'}}
 int __attribute__((target("dotprod"))) foo(void) { return -1; }
+//expected-warning@-1 {{attribute declaration must precede definition}}
 
 //expected-error@+1 {{redefinition of 'foo'}}
 int foo(void) { return 2; }
 
+//expected-note@+1 {{previous definition is here}}
+__attribute__ ((target("bf16,sve,sve2,dotprod"))) int func(void) { return 1; }
+//expected-error@+1 {{redefinition of 'func'}}
+__attribute__ ((target("default"))) int func(void) { return 0; }
+
 //expected-note@+1 {{previous declaration is here}}
 void __attribute__((target_version("bti+flagm2"))) one(void) {}
 //expected-error@+1 {{multiversioned function redeclarations require identical 
target attributes}}
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -11540,6 +11540,10 @@
 return false;
   }
 
+  // Target attribute on AArch64 is not used for multiversioning
+  if (NewTA && S.getASTContext().getTargetInfo().getTriple().isAArch64())
+return false;
+
   if (!OldDecl || !OldDecl->getAsFunction() ||
   OldDecl->getDeclContext()->getRedeclContext() !=
   NewFD->getDeclContext()->getRedeclContext()) {


Index: clang/test/Sema/attr-target-version.c
===
--- clang/test/Sema/attr-target-version.c
+++ clang/test/Sema/attr-target-version.c
@@ -15,15 +15,22 @@
 //expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("default"))) foo(void) { return 2; }
 
-//expected-note@+1 {{previous declaration is here}}
+//expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("sha3 + pmull "))) foo(void) { return 1; }
+//expected-note@-1 {{previous definition is here}}
 
-//expected-error@+1 {{multiversioning attributes cannot be combined}}
+//expected-error@+1 {{redefinition of 'foo'}}
 int __attribute__((target("dotprod"))) foo(void) { return -1; }
+//expected-warning@-1 {{attribute declaration must precede definition}}
 
 //expected-error@+1 {{redefinition of 'foo'}}
 int foo(void) { return 2; }
 
+//expected-note@+1 {{previous definition is here}}
+__attribute__ ((target("bf16,sve,sve2,dotprod"))) int func(void) { return 1; }
+//expected-error@+1 {{redefinition of 'func'}}
+__attribute__ ((target("default"))) int func(void) { return 0; }
+
 //expected-note@+1 {{previous declaration is here}}
 void __attribute__((target_version("bti+flagm2"))) one(void) {}
 //expected-error@+1 {{multiversioned function redeclarations require identical target attributes}}
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -11540,6 +11540,10 @@
 return false;
   }
 
+  // Target attribute on AArch64 is not used for multiversioning
+  if (NewTA && S.getASTContext().getTargetInfo().getTriple().isAArch64())
+return false;
+
   if (!OldDecl || !OldDecl->getAsFunction() ||
   OldDecl->getDeclContext()->getRedeclContext() !=
   NewFD->getDeclContext()->getRedeclContext()) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150800: [AArch64][FMV] Fix name mangling.

2023-05-23 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG30b0fdfff193: [AArch64][FMV] Fix name mangling. (authored by 
ilinpv).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150800/new/

https://reviews.llvm.org/D150800

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGen/attr-target-clones-aarch64.c
  clang/test/CodeGen/attr-target-version.c
  clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
  clang/test/CodeGenCXX/attr-target-version.cpp

Index: clang/test/CodeGenCXX/attr-target-version.cpp
===
--- clang/test/CodeGenCXX/attr-target-version.cpp
+++ clang/test/CodeGenCXX/attr-target-version.cpp
@@ -27,7 +27,7 @@
 // CHECK: @_Z3fooi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z3fooi.resolver
 // CHECK: @_Z3foov.ifunc = weak_odr ifunc i32 (), ptr @_Z3foov.resolver
 
-// CHECK-LABEL: @_Z3fooi._Msme-f64f64Mbf16(
+// CHECK-LABEL: @_Z3fooi._Mbf16Msme-f64f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
@@ -105,7 +105,7 @@
 // CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:   resolver_return:
-// CHECK-NEXT:ret ptr @_Z3fooi._Msme-f64f64Mbf16
+// CHECK-NEXT:ret ptr @_Z3fooi._Mbf16Msme-f64f64
 // CHECK:   resolver_else:
 // CHECK-NEXT:ret ptr @_Z3fooi
 // CHECK-LABEL: @_Z3foov.resolver(
Index: clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
===
--- clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
+++ clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
@@ -108,7 +108,7 @@
 // CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:   resolver_return:
-// CHECK-NEXT:ret ptr @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs
+// CHECK-NEXT:ret ptr @_ZN7MyClassIssE7foo_tmlEv._MssbsMsme-f64f64
 // CHECK:   resolver_else:
 // CHECK-NEXT:[[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:[[TMP5:%.*]] = and i64 [[TMP4]], 16777216
@@ -128,7 +128,7 @@
 // CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:   resolver_return:
-// CHECK-NEXT:ret ptr @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs
+// CHECK-NEXT:ret ptr @_ZN7MyClassIisE7foo_tmlEv._MssbsMsme-f64f64
 // CHECK:   resolver_else:
 // CHECK-NEXT:[[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:[[TMP5:%.*]] = and i64 [[TMP4]], 16777216
@@ -157,7 +157,7 @@
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:[[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:ret i32 1
-// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs(
+// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._MssbsMsme-f64f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
@@ -175,7 +175,7 @@
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:[[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:ret i32 2
-// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs(
+// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._MssbsMsme-f64f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
Index: clang/test/CodeGen/attr-target-version.c
===
--- clang/test/CodeGen/attr-target-version.c
+++ clang/test/CodeGen/attr-target-version.c
@@ -119,7 +119,7 @@
 // CHECK-LABEL: @fmv(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 0
-// CHECK-LABEL: @fmv_one._Mls64Msimd(
+// CHECK-LABEL: @fmv_one._MsimdMls64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 1
 // CHECK-LABEL: @fmv_one._Mdpb(
@@ -137,7 +137,7 @@
 // CHECK-LABEL: @fmv_two._Mdgh(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 3
-// CHECK-LABEL: @fmv_two._Mfp16Msimd(
+// CHECK-LABEL: @fmv_two._MsimdMfp16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 4
 // CHECK-LABEL: @fmv_two(
@@ -229,10 +229,10 @@
 // CHECK-NEXT:ret ptr @fmv
 // CHECK-LABEL: @fmv_one.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:ret ptr @fmv_one._Mls64Msimd
+// CHECK-NEXT:ret ptr @fmv_one._MsimdMls64
 // CHECK-LABEL: @fmv_two.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:ret ptr @fmv_two._Mfp16Msimd
+// CHECK-NEXT:ret ptr @fmv_two._MsimdMfp16
 // CHECK-LABEL: @fmv_e(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:   

[PATCH] D150867: [AArch64][FMV] Prevent target attribute using for multiversioning.

2023-05-18 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added a reviewer: danielkiss.
Herald added a subscriber: kristof.beyls.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

On AArch64 for function multiversioning target_version/target_clones attributes 
should be used. The patch fixes the defect allowing target attribute to cause 
multiversioning.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150867

Files:
  clang/lib/Sema/SemaDecl.cpp
  clang/test/Sema/attr-target-version.c


Index: clang/test/Sema/attr-target-version.c
===
--- clang/test/Sema/attr-target-version.c
+++ clang/test/Sema/attr-target-version.c
@@ -15,15 +15,22 @@
 //expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("default"))) foo(void) { return 2; }
 
-//expected-note@+1 {{previous declaration is here}}
+//expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("sha3 + pmull "))) foo(void) { return 1; }
+//expected-note@-1 {{previous definition is here}}
 
-//expected-error@+1 {{multiversioning attributes cannot be combined}}
+//expected-error@+1 {{redefinition of 'foo'}}
 int __attribute__((target("dotprod"))) foo(void) { return -1; }
+//expected-warning@-1 {{attribute declaration must precede definition}}
 
 //expected-error@+1 {{redefinition of 'foo'}}
 int foo(void) { return 2; }
 
+//expected-note@+1 {{previous definition is here}}
+__attribute__ ((target("bf16,sve,sve2,dotprod"))) int func(void) { return 1; }
+//expected-error@+1 {{redefinition of 'func'}}
+__attribute__ ((target("default"))) int func(void) { return 0; }
+
 //expected-note@+1 {{previous declaration is here}}
 void __attribute__((target_version("bti+flagm2"))) one(void) {}
 //expected-error@+1 {{multiversioned function redeclarations require identical 
target attributes}}
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -11540,6 +11540,10 @@
 return false;
   }
 
+  // Target attribute on AArch64 is not used for multiversioning
+  if (NewTA && S.getASTContext().getTargetInfo().getTriple().isAArch64())
+return false;
+
   if (!OldDecl || !OldDecl->getAsFunction() ||
   OldDecl->getDeclContext()->getRedeclContext() !=
   NewFD->getDeclContext()->getRedeclContext()) {


Index: clang/test/Sema/attr-target-version.c
===
--- clang/test/Sema/attr-target-version.c
+++ clang/test/Sema/attr-target-version.c
@@ -15,15 +15,22 @@
 //expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("default"))) foo(void) { return 2; }
 
-//expected-note@+1 {{previous declaration is here}}
+//expected-note@+1 {{previous definition is here}}
 int __attribute__((target_version("sha3 + pmull "))) foo(void) { return 1; }
+//expected-note@-1 {{previous definition is here}}
 
-//expected-error@+1 {{multiversioning attributes cannot be combined}}
+//expected-error@+1 {{redefinition of 'foo'}}
 int __attribute__((target("dotprod"))) foo(void) { return -1; }
+//expected-warning@-1 {{attribute declaration must precede definition}}
 
 //expected-error@+1 {{redefinition of 'foo'}}
 int foo(void) { return 2; }
 
+//expected-note@+1 {{previous definition is here}}
+__attribute__ ((target("bf16,sve,sve2,dotprod"))) int func(void) { return 1; }
+//expected-error@+1 {{redefinition of 'func'}}
+__attribute__ ((target("default"))) int func(void) { return 0; }
+
 //expected-note@+1 {{previous declaration is here}}
 void __attribute__((target_version("bti+flagm2"))) one(void) {}
 //expected-error@+1 {{multiversioned function redeclarations require identical target attributes}}
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -11540,6 +11540,10 @@
 return false;
   }
 
+  // Target attribute on AArch64 is not used for multiversioning
+  if (NewTA && S.getASTContext().getTargetInfo().getTriple().isAArch64())
+return false;
+
   if (!OldDecl || !OldDecl->getAsFunction() ||
   OldDecl->getDeclContext()->getRedeclContext() !=
   NewFD->getDeclContext()->getRedeclContext()) {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150800: [AArch64][FMV] Fix name mangling.

2023-05-17 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added a reviewer: danielkiss.
Herald added a subscriber: kristof.beyls.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Put features into function version name in increasing priority order.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150800

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGen/attr-target-clones-aarch64.c
  clang/test/CodeGen/attr-target-version.c
  clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
  clang/test/CodeGenCXX/attr-target-version.cpp

Index: clang/test/CodeGenCXX/attr-target-version.cpp
===
--- clang/test/CodeGenCXX/attr-target-version.cpp
+++ clang/test/CodeGenCXX/attr-target-version.cpp
@@ -27,7 +27,7 @@
 // CHECK: @_Z3fooi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z3fooi.resolver
 // CHECK: @_Z3foov.ifunc = weak_odr ifunc i32 (), ptr @_Z3foov.resolver
 
-// CHECK-LABEL: @_Z3fooi._Msme-f64f64Mbf16(
+// CHECK-LABEL: @_Z3fooi._Mbf16Msme-f64f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[DOTADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:store i32 [[TMP0:%.*]], ptr [[DOTADDR]], align 4
@@ -105,7 +105,7 @@
 // CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:   resolver_return:
-// CHECK-NEXT:ret ptr @_Z3fooi._Msme-f64f64Mbf16
+// CHECK-NEXT:ret ptr @_Z3fooi._Mbf16Msme-f64f64
 // CHECK:   resolver_else:
 // CHECK-NEXT:ret ptr @_Z3fooi
 // CHECK-LABEL: @_Z3foov.resolver(
Index: clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
===
--- clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
+++ clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp
@@ -108,7 +108,7 @@
 // CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:   resolver_return:
-// CHECK-NEXT:ret ptr @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs
+// CHECK-NEXT:ret ptr @_ZN7MyClassIssE7foo_tmlEv._MssbsMsme-f64f64
 // CHECK:   resolver_else:
 // CHECK-NEXT:[[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:[[TMP5:%.*]] = and i64 [[TMP4]], 16777216
@@ -128,7 +128,7 @@
 // CHECK-NEXT:[[TMP3:%.*]] = and i1 true, [[TMP2]]
 // CHECK-NEXT:br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
 // CHECK:   resolver_return:
-// CHECK-NEXT:ret ptr @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs
+// CHECK-NEXT:ret ptr @_ZN7MyClassIisE7foo_tmlEv._MssbsMsme-f64f64
 // CHECK:   resolver_else:
 // CHECK-NEXT:[[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:[[TMP5:%.*]] = and i64 [[TMP4]], 16777216
@@ -157,7 +157,7 @@
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:[[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:ret i32 1
-// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._Msme-f64f64Mssbs(
+// CHECK-LABEL: @_ZN7MyClassIssE7foo_tmlEv._MssbsMsme-f64f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
@@ -175,7 +175,7 @@
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:[[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:ret i32 2
-// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._Msme-f64f64Mssbs(
+// CHECK-LABEL: @_ZN7MyClassIisE7foo_tmlEv._MssbsMsme-f64f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
Index: clang/test/CodeGen/attr-target-version.c
===
--- clang/test/CodeGen/attr-target-version.c
+++ clang/test/CodeGen/attr-target-version.c
@@ -119,7 +119,7 @@
 // CHECK-LABEL: @fmv(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 0
-// CHECK-LABEL: @fmv_one._Mls64Msimd(
+// CHECK-LABEL: @fmv_one._MsimdMls64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 1
 // CHECK-LABEL: @fmv_one._Mdpb(
@@ -137,7 +137,7 @@
 // CHECK-LABEL: @fmv_two._Mdgh(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 3
-// CHECK-LABEL: @fmv_two._Mfp16Msimd(
+// CHECK-LABEL: @fmv_two._MsimdMfp16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:ret i32 4
 // CHECK-LABEL: @fmv_two(
@@ -229,10 +229,10 @@
 // CHECK-NEXT:ret ptr @fmv
 // CHECK-LABEL: @fmv_one.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:ret ptr @fmv_one._Mls64Msimd
+// CHECK-NEXT:ret ptr @fmv_one._MsimdMls64
 // CHECK-LABEL: @fmv_two.resolver(
 // CHECK-NEXT:  resolver_entry:
-// CHECK-NEXT:ret ptr @fmv_two._Mfp16Msimd
+// CHECK-NEXT:ret 

[PATCH] D145538: [NFC][AArch64] Document and improve FMV code.

2023-03-08 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG124b46a897a7: [NFC][AArch64] Document and improve FMV code. 
(authored by ilinpv).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145538/new/

https://reviews.llvm.org/D145538

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/Sema/SemaDeclAttr.cpp
  llvm/include/llvm/TargetParser/AArch64TargetParser.h

Index: llvm/include/llvm/TargetParser/AArch64TargetParser.h
===
--- llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -25,6 +25,9 @@
 class Triple;
 
 namespace AArch64 {
+// Function Multi Versioning CPU features. They must be kept in sync with
+// compiler-rt enum CPUFeatures in lib/builtins/cpu_model.c with FEAT_MAX as
+// sentinel.
 enum CPUFeatures {
   FEAT_RNG,
   FEAT_FLAGM,
@@ -87,6 +90,9 @@
   FEAT_MAX
 };
 
+static_assert(FEAT_MAX <= 64,
+  "CPUFeatures enum must not have more than 64 entries");
+
 // Arch extension modifiers for CPUs. These are labelled with their Arm ARM
 // feature name (though the canonical reference for those is AArch64.td)
 // clang-format off
@@ -155,17 +161,18 @@
 // SubtargetFeature which may represent either an actual extension or some
 // internal LLVM property.
 struct ExtensionInfo {
-  StringRef Name;   // Human readable name, e.g. "profile".
-  ArchExtKind ID;   // Corresponding to the ArchExtKind, this extensions
-// representation in the bitfield.
-  StringRef Feature;// -mattr enable string, e.g. "+spe"
-  StringRef NegFeature; // -mattr disable string, e.g. "-spe"
-
-  // FIXME These were added by D127812 FMV support and need documenting:
-  CPUFeatures CPUFeature; // Bitfield value set in __aarch64_cpu_features
-  StringRef DependentFeatures;
-  unsigned FmvPriority;
-  static constexpr unsigned MaxFMVPriority = 1000;
+  StringRef Name;  // Human readable name, e.g. "profile".
+  ArchExtKind ID;  // Corresponding to the ArchExtKind, this
+   // extensions representation in the bitfield.
+  StringRef Feature;   // -mattr enable string, e.g. "+spe"
+  StringRef NegFeature;// -mattr disable string, e.g. "-spe"
+  CPUFeatures CPUFeature;  // Function Multi Versioning (FMV) bitfield value
+   // set in __aarch64_cpu_features
+  StringRef DependentFeatures; // FMV enabled features string,
+   // e.g. "+dotprod,+fp-armv8,+neon"
+  unsigned FmvPriority;// FMV feature priority
+  static constexpr unsigned MaxFMVPriority =
+  1000; // Maximum priority for FMV feature
 };
 
 // clang-format off
@@ -559,6 +566,10 @@
 void fillValidCPUArchList(SmallVectorImpl );
 
 bool isX18ReservedByDefault(const Triple );
+
+// For given feature names, return a bitmask corresponding to the entries of
+// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks
+// themselves, they are sequential (0, 1, 2, 3, ...).
 uint64_t getCpuSupportsMask(ArrayRef FeatureStrs);
 
 } // namespace AArch64
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -3508,6 +3508,7 @@
   enum SecondParam { None, CPU, Tune };
   enum ThirdParam { Target, TargetClones };
   HasCommas = HasCommas || Str.contains(',');
+  const TargetInfo  = Context.getTargetInfo();
   // Warn on empty at the beginning of a string.
   if (Str.size() == 0)
 return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
@@ -3517,9 +3518,9 @@
   while (!Parts.second.empty()) {
 Parts = Parts.second.split(',');
 StringRef Cur = Parts.first.trim();
-SourceLocation CurLoc = Literal->getLocationOfByte(
-Cur.data() - Literal->getString().data(), getSourceManager(),
-getLangOpts(), Context.getTargetInfo());
+SourceLocation CurLoc =
+Literal->getLocationOfByte(Cur.data() - Literal->getString().data(),
+   getSourceManager(), getLangOpts(), TInfo);
 
 bool DefaultIsDupe = false;
 bool HasCodeGenImpact = false;
@@ -3527,7 +3528,7 @@
   return Diag(CurLoc, diag::warn_unsupported_target_attribute)
  << Unsupported << None << "" << TargetClones;
 
-if (Context.getTargetInfo().getTriple().isAArch64()) {
+if (TInfo.getTriple().isAArch64()) {
   // AArch64 target clones specific
   if (Cur == "default") {
 DefaultIsDupe = HasDefault;
@@ -3542,13 +3543,12 @@
 while (!CurParts.second.empty()) {
   CurParts = CurParts.second.split('+');
   StringRef CurFeature = CurParts.first.trim();
-  if 

[PATCH] D145538: [NFC][AArch64] Document and improve FMV code.

2023-03-08 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked 5 inline comments as done.
ilinpv added inline comments.



Comment at: llvm/include/llvm/TargetParser/AArch64TargetParser.h:567-568
+
+// For given features returns a mask to check if CPU support them. The mask is
+// used in Function Multi Versioning resolver conditions code generation.
 uint64_t getCpuSupportsMask(ArrayRef FeatureStrs);

tmatheson wrote:
> `CPUFeatures` has 60 entries, which means the return value here will overflow 
> if we add a few more entries. We should probably have a 
> `static_assert(FEAT_MAX <= 64)` in the implementation. Or should the 
> `CPUFeatures` values actually be bitmasks, like ArchExtKind?
static_assert added. I think changing values to masks could be done separately, 
it would be good to have if we eventually come to CPUFeatures and ArchExtKind 
unification. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145538/new/

https://reviews.llvm.org/D145538

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145538: [NFC][AArch64] Document and improve FMV code.

2023-03-08 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 503446.
ilinpv added a comment.

Rebasing and addressing comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145538/new/

https://reviews.llvm.org/D145538

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/Sema/SemaDeclAttr.cpp
  llvm/include/llvm/TargetParser/AArch64TargetParser.h

Index: llvm/include/llvm/TargetParser/AArch64TargetParser.h
===
--- llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -25,6 +25,9 @@
 class Triple;
 
 namespace AArch64 {
+// Function Multi Versioning CPU features. They must be kept in sync with
+// compiler-rt enum CPUFeatures in lib/builtins/cpu_model.c with FEAT_MAX as
+// sentinel.
 enum CPUFeatures {
   FEAT_RNG,
   FEAT_FLAGM,
@@ -87,6 +90,9 @@
   FEAT_MAX
 };
 
+static_assert(FEAT_MAX <= 64,
+  "CPUFeatures enum must not have more than 64 entries");
+
 // Arch extension modifiers for CPUs. These are labelled with their Arm ARM
 // feature name (though the canonical reference for those is AArch64.td)
 // clang-format off
@@ -155,17 +161,18 @@
 // SubtargetFeature which may represent either an actual extension or some
 // internal LLVM property.
 struct ExtensionInfo {
-  StringRef Name;   // Human readable name, e.g. "profile".
-  ArchExtKind ID;   // Corresponding to the ArchExtKind, this extensions
-// representation in the bitfield.
-  StringRef Feature;// -mattr enable string, e.g. "+spe"
-  StringRef NegFeature; // -mattr disable string, e.g. "-spe"
-
-  // FIXME These were added by D127812 FMV support and need documenting:
-  CPUFeatures CPUFeature; // Bitfield value set in __aarch64_cpu_features
-  StringRef DependentFeatures;
-  unsigned FmvPriority;
-  static constexpr unsigned MaxFMVPriority = 1000;
+  StringRef Name;  // Human readable name, e.g. "profile".
+  ArchExtKind ID;  // Corresponding to the ArchExtKind, this
+   // extensions representation in the bitfield.
+  StringRef Feature;   // -mattr enable string, e.g. "+spe"
+  StringRef NegFeature;// -mattr disable string, e.g. "-spe"
+  CPUFeatures CPUFeature;  // Function Multi Versioning (FMV) bitfield value
+   // set in __aarch64_cpu_features
+  StringRef DependentFeatures; // FMV enabled features string,
+   // e.g. "+dotprod,+fp-armv8,+neon"
+  unsigned FmvPriority;// FMV feature priority
+  static constexpr unsigned MaxFMVPriority =
+  1000; // Maximum priority for FMV feature
 };
 
 // clang-format off
@@ -559,6 +566,10 @@
 void fillValidCPUArchList(SmallVectorImpl );
 
 bool isX18ReservedByDefault(const Triple );
+
+// For given feature names, return a bitmask corresponding to the entries of
+// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks
+// themselves, they are sequential (0, 1, 2, 3, ...).
 uint64_t getCpuSupportsMask(ArrayRef FeatureStrs);
 
 } // namespace AArch64
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -3508,6 +3508,7 @@
   enum SecondParam { None, CPU, Tune };
   enum ThirdParam { Target, TargetClones };
   HasCommas = HasCommas || Str.contains(',');
+  const TargetInfo  = Context.getTargetInfo();
   // Warn on empty at the beginning of a string.
   if (Str.size() == 0)
 return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
@@ -3517,9 +3518,9 @@
   while (!Parts.second.empty()) {
 Parts = Parts.second.split(',');
 StringRef Cur = Parts.first.trim();
-SourceLocation CurLoc = Literal->getLocationOfByte(
-Cur.data() - Literal->getString().data(), getSourceManager(),
-getLangOpts(), Context.getTargetInfo());
+SourceLocation CurLoc =
+Literal->getLocationOfByte(Cur.data() - Literal->getString().data(),
+   getSourceManager(), getLangOpts(), TInfo);
 
 bool DefaultIsDupe = false;
 bool HasCodeGenImpact = false;
@@ -3527,7 +3528,7 @@
   return Diag(CurLoc, diag::warn_unsupported_target_attribute)
  << Unsupported << None << "" << TargetClones;
 
-if (Context.getTargetInfo().getTriple().isAArch64()) {
+if (TInfo.getTriple().isAArch64()) {
   // AArch64 target clones specific
   if (Cur == "default") {
 DefaultIsDupe = HasDefault;
@@ -3542,13 +3543,12 @@
 while (!CurParts.second.empty()) {
   CurParts = CurParts.second.split('+');
   StringRef CurFeature = CurParts.first.trim();
-  if (!Context.getTargetInfo().validateCpuSupports(CurFeature)) {
+  if 

[PATCH] D145538: [NFC][AArch64] Document and improve FMV code.

2023-03-07 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: tmatheson, danielkiss.
Herald added a subscriber: kristof.beyls.
Herald added a reviewer: aaron.ballman.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D145538

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/Sema/SemaDeclAttr.cpp
  llvm/include/llvm/TargetParser/AArch64TargetParser.h

Index: llvm/include/llvm/TargetParser/AArch64TargetParser.h
===
--- llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -25,6 +25,9 @@
 class Triple;
 
 namespace AArch64 {
+// Function Multi Versioning CPU features. They must be kept in sync with
+// compiler-rt enum CPUFeatures in lib/builtins/cpu_model.c with FEAT_MAX as
+// sentinel.
 enum CPUFeatures {
   FEAT_RNG,
   FEAT_FLAGM,
@@ -155,17 +158,18 @@
 // SubtargetFeature which may represent either an actual extension or some
 // internal LLVM property.
 struct ExtensionInfo {
-  StringRef Name;   // Human readable name, e.g. "profile".
-  ArchExtKind ID;   // Corresponding to the ArchExtKind, this extensions
-// representation in the bitfield.
-  StringRef Feature;// -mattr enable string, e.g. "+spe"
-  StringRef NegFeature; // -mattr disable string, e.g. "-spe"
-
-  // FIXME These were added by D127812 FMV support and need documenting:
-  CPUFeatures CPUFeature; // Bitfield value set in __aarch64_cpu_features
-  StringRef DependentFeatures;
-  unsigned FmvPriority;
-  static constexpr unsigned MaxFMVPriority = 1000;
+  StringRef Name;  // Human readable name, e.g. "profile".
+  ArchExtKind ID;  // Corresponding to the ArchExtKind, this
+   // extensions representation in the bitfield.
+  StringRef Feature;   // -mattr enable string, e.g. "+spe"
+  StringRef NegFeature;// -mattr disable string, e.g. "-spe"
+  CPUFeatures CPUFeature;  // Function Multi Versioning (FMV) bitfield value
+   // set in __aarch64_cpu_features
+  StringRef DependentFeatures; // FMV enabled features string,
+   // e.g. "+dotprod,+fp-armv8,+neon"
+  unsigned FmvPriority;// FMV feature priority
+  static constexpr unsigned MaxFMVPriority =
+  1000; // Maximum priority for FMV feature
 };
 
 // clang-format off
@@ -559,6 +563,9 @@
 void fillValidCPUArchList(SmallVectorImpl );
 
 bool isX18ReservedByDefault(const Triple );
+
+// For given features returns a mask to check if CPU support them. The mask is
+// used in Function Multi Versioning resolver conditions code generation.
 uint64_t getCpuSupportsMask(ArrayRef FeatureStrs);
 
 } // namespace AArch64
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -3508,6 +3508,7 @@
   enum SecondParam { None, CPU, Tune };
   enum ThirdParam { Target, TargetClones };
   HasCommas = HasCommas || Str.contains(',');
+  const TargetInfo  = Context.getTargetInfo();
   // Warn on empty at the beginning of a string.
   if (Str.size() == 0)
 return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
@@ -3517,9 +3518,9 @@
   while (!Parts.second.empty()) {
 Parts = Parts.second.split(',');
 StringRef Cur = Parts.first.trim();
-SourceLocation CurLoc = Literal->getLocationOfByte(
-Cur.data() - Literal->getString().data(), getSourceManager(),
-getLangOpts(), Context.getTargetInfo());
+SourceLocation CurLoc =
+Literal->getLocationOfByte(Cur.data() - Literal->getString().data(),
+   getSourceManager(), getLangOpts(), TInfo);
 
 bool DefaultIsDupe = false;
 bool HasCodeGenImpact = false;
@@ -3527,7 +3528,7 @@
   return Diag(CurLoc, diag::warn_unsupported_target_attribute)
  << Unsupported << None << "" << TargetClones;
 
-if (Context.getTargetInfo().getTriple().isAArch64()) {
+if (TInfo.getTriple().isAArch64()) {
   // AArch64 target clones specific
   if (Cur == "default") {
 DefaultIsDupe = HasDefault;
@@ -3542,13 +3543,12 @@
 while (!CurParts.second.empty()) {
   CurParts = CurParts.second.split('+');
   StringRef CurFeature = CurParts.first.trim();
-  if (!Context.getTargetInfo().validateCpuSupports(CurFeature)) {
+  if (!TInfo.validateCpuSupports(CurFeature)) {
 Diag(CurLoc, diag::warn_unsupported_target_attribute)
 << Unsupported << None << CurFeature << TargetClones;
 continue;
   }
-   

[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2023-03-07 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:1338
+  hwcap = getauxval(AT_HWCAP);
+  hwcap2 = getauxval(AT_HWCAP2);
+#endif // defined(__FreeBSD__)

nikic wrote:
> This breaks the build with glibc 2.17.
Thanks for https://reviews.llvm.org/D145494 fix. AT_HWCAP2 was not defined.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2023-01-30 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4085171 , @tmatheson wrote:

> This patch has made it considerably harder to understand what is going on in 
> the TargetParser. If you get a chance, please could you add some clarifying 
> comments and tidy-ups. I appreciate that a lot of this is following the lead 
> of the pre-existing TargetParser code, but lets try to improve it as we go.

I fully agree, thank you for valuable comments! Let me address them in separate 
patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D142265: [AArch64] Function multi-versioning release notes added. NFC.

2023-01-23 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc57eaf1c8ce3: [AArch64] Function multi-versioning release 
notes added. NFC. (authored by ilinpv).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142265/new/

https://reviews.llvm.org/D142265

Files:
  clang/docs/ReleaseNotes.rst
  llvm/docs/ReleaseNotes.rst


Index: llvm/docs/ReleaseNotes.rst
===
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,10 @@
 * Added support for the Neoverse V2 CPU.
 * Added support for assembly for RME MEC (Memory Encryption Contexts).
 * Added codegen support for the Armv8.3 Complex Number extension.
+* Implemented `Function Multi Versioning
+  
`_
+  in accordance with Arm C Language Extensions specification. Currently in Beta
+  state.
 
 Changes to the AMDGPU Backend
 -
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -614,6 +614,15 @@
   memory placement. It emits a warning if something in the code provably 
prevents
   an instance from a read-only memory placement.
 
+- Introduced new attribute ``__attribute__((target_version("cpu_features")))``
+  and expanded the functionality of the existing attribute
+  ``__attribute__((target_clones("cpu_features1","cpu_features2",...)))`` to
+  support Function Multi Versioning on AArch64 target. It detects at runtime
+  which function versions are supported by CPU and calls the one with highest
+  priority. Refer to `clang attributes
+  `_ for
+  more details.
+
 Windows Support
 ---
 - For the MinGW driver, added the options ``-mguard=none``, ``-mguard=cf`` and


Index: llvm/docs/ReleaseNotes.rst
===
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,10 @@
 * Added support for the Neoverse V2 CPU.
 * Added support for assembly for RME MEC (Memory Encryption Contexts).
 * Added codegen support for the Armv8.3 Complex Number extension.
+* Implemented `Function Multi Versioning
+  `_
+  in accordance with Arm C Language Extensions specification. Currently in Beta
+  state.
 
 Changes to the AMDGPU Backend
 -
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -614,6 +614,15 @@
   memory placement. It emits a warning if something in the code provably prevents
   an instance from a read-only memory placement.
 
+- Introduced new attribute ``__attribute__((target_version("cpu_features")))``
+  and expanded the functionality of the existing attribute
+  ``__attribute__((target_clones("cpu_features1","cpu_features2",...)))`` to
+  support Function Multi Versioning on AArch64 target. It detects at runtime
+  which function versions are supported by CPU and calls the one with highest
+  priority. Refer to `clang attributes
+  `_ for
+  more details.
+
 Windows Support
 ---
 - For the MinGW driver, added the options ``-mguard=none``, ``-mguard=cf`` and
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141606: [AArch64] Remove AES, SHA2, SHA3 and SM4 features from armv8.6-a+

2023-01-23 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Sorry, commit rG5474d7d93271 
 is not 
related to this, I put wrong differential revision link


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141606/new/

https://reviews.llvm.org/D141606

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141606: [AArch64] Remove AES, SHA2, SHA3 and SM4 features from armv8.6-a+

2023-01-23 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5474d7d93271: [AArch64] Function multi-versioning release 
notes added. NFC. (authored by ilinpv).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D141606?vs=489558=491430#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141606/new/

https://reviews.llvm.org/D141606

Files:
  clang/docs/ReleaseNotes.rst
  llvm/docs/ReleaseNotes.rst


Index: llvm/docs/ReleaseNotes.rst
===
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,10 @@
 * Added support for the Neoverse V2 CPU.
 * Added support for assembly for RME MEC (Memory Encryption Contexts).
 * Added codegen support for the Armv8.3 Complex Number extension.
+* Implemented `Function Multi Versioning
+  
`_
+  in accordance with Arm C Language Extensions specification. Currently in Beta
+  state.
 
 Changes to the AMDGPU Backend
 -
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -614,6 +614,15 @@
   memory placement. It emits a warning if something in the code provably 
prevents
   an instance from a read-only memory placement.
 
+- Introduced new attribute ``__attribute__((target_version("cpu_features")))``
+  and expanded the functionality of the existing attribute
+  ``__attribute__((target_clones("cpu_features1","cpu_features2",...)))`` to
+  support Function Multi Versioning on AArch64 target. It detects at runtime
+  which function versions are supported by CPU and calls the one with highest
+  priority. Refer to `clang attributes
+  `_ for
+  more details.
+
 Windows Support
 ---
 - For the MinGW driver, added the options ``-mguard=none``, ``-mguard=cf`` and


Index: llvm/docs/ReleaseNotes.rst
===
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,10 @@
 * Added support for the Neoverse V2 CPU.
 * Added support for assembly for RME MEC (Memory Encryption Contexts).
 * Added codegen support for the Armv8.3 Complex Number extension.
+* Implemented `Function Multi Versioning
+  `_
+  in accordance with Arm C Language Extensions specification. Currently in Beta
+  state.
 
 Changes to the AMDGPU Backend
 -
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -614,6 +614,15 @@
   memory placement. It emits a warning if something in the code provably prevents
   an instance from a read-only memory placement.
 
+- Introduced new attribute ``__attribute__((target_version("cpu_features")))``
+  and expanded the functionality of the existing attribute
+  ``__attribute__((target_clones("cpu_features1","cpu_features2",...)))`` to
+  support Function Multi Versioning on AArch64 target. It detects at runtime
+  which function versions are supported by CPU and calls the one with highest
+  priority. Refer to `clang attributes
+  `_ for
+  more details.
+
 Windows Support
 ---
 - For the MinGW driver, added the options ``-mguard=none``, ``-mguard=cf`` and
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D142265: [AArch64] Function multi-versioning release notes added. NFC.

2023-01-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: danielkiss, samtebbs, dmgreen.
Herald added a subscriber: kristof.beyls.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D142265

Files:
  clang/docs/ReleaseNotes.rst
  llvm/docs/ReleaseNotes.rst


Index: llvm/docs/ReleaseNotes.rst
===
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,10 @@
 * Added support for the Neoverse V2 CPU.
 * Added support for assembly for RME MEC (Memory Encryption Contexts).
 * Added codegen support for the Armv8.3 Complex Number extension.
+* Implemented `Function Multi Versioning
+  
`_
+  in accordance with Arm C Language Extensions specification. Currently in Beta
+  state.
 
 Changes to the AMDGPU Backend
 -
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -614,6 +614,15 @@
   memory placement. It emits a warning if something in the code provably 
prevents
   an instance from a read-only memory placement.
 
+- Introduced new attribute ``__attribute__((target_version("cpu_features")))``
+  and expanded the functionality of the existing attribute
+  ``__attribute__((target_clones("cpu_features1","cpu_features2",...)))`` to
+  support Function Multi Versioning on AArch64 target. It detects at runtime
+  which function versions are supported by CPU and calls the one with highest
+  priority. Refer to `clang attributes
+  `_ for
+  more details.
+
 Windows Support
 ---
 - For the MinGW driver, added the options ``-mguard=none``, ``-mguard=cf`` and


Index: llvm/docs/ReleaseNotes.rst
===
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -133,6 +133,10 @@
 * Added support for the Neoverse V2 CPU.
 * Added support for assembly for RME MEC (Memory Encryption Contexts).
 * Added codegen support for the Armv8.3 Complex Number extension.
+* Implemented `Function Multi Versioning
+  `_
+  in accordance with Arm C Language Extensions specification. Currently in Beta
+  state.
 
 Changes to the AMDGPU Backend
 -
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -614,6 +614,15 @@
   memory placement. It emits a warning if something in the code provably prevents
   an instance from a read-only memory placement.
 
+- Introduced new attribute ``__attribute__((target_version("cpu_features")))``
+  and expanded the functionality of the existing attribute
+  ``__attribute__((target_clones("cpu_features1","cpu_features2",...)))`` to
+  support Function Multi Versioning on AArch64 target. It detects at runtime
+  which function versions are supported by CPU and calls the one with highest
+  priority. Refer to `clang attributes
+  `_ for
+  more details.
+
 Windows Support
 ---
 - For the MinGW driver, added the options ``-mguard=none``, ``-mguard=cf`` and
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2023-01-07 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4031994 , @smeenai wrote:

> You're right that it conceptually makes sense for this to be in `cpu_model.c` 
> though. An alternative would be providing an option for compiler-rt to be 
> built without the multiversioning support, e.g. if it's built with `-mno-fmv` 
> itself. Does that seem reasonable?

Sounds good to me, please look into the patch adding such cmake option 
https://reviews.llvm.org/D141199
Using cmake `-DCOMPILER_RT_DISABLE_AARCH64_FMV=On` will exclude Aarch64 
multiversioning support from compiler-rt build.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2023-01-06 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4031313 , @smeenai wrote:

> We can use `-mno-fmv` to avoid that dependency, right? We're interested in 
> using that for our own code (where we don't make use of function 
> multi-versioning), and want to prevent the compiler-rt support from being 
> pulled in from the archive unnecessarily. It'd still be available for users 
> who needed it.

Right, you will need to explicitly provide '-mno-fmv` then. Currently 
__aarch64_cpu_features stuff located in bultins 
(`libclang_rt.builtins-aarch64.a`). Did I understand you correctly that your 
apps linked agains libclang_rt.builtins-aarch64.a and if we move function 
multiversioning part to new library, lets say 
`libclang_rt.cpu_features-aarch64.a`, that will resolve your concern ? As a 
sidenote, builtins/cpu_model.c contains X86 CPU features used in function 
multiversioning on that target as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2023-01-06 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4030881 , @smeenai wrote:

> We're not actually using multi-versioning anywhere, but we're still paying 
> the size cost for it as a result. Would we consider moving the newly added 
> functions into their own file (or perhaps moving the outlined atomics 
> functions into a different file), so that you can use outlined atomics 
> without also paying the size cost of function multiversioning if you don't 
> need it?

Function multiversioning expects compiler-rt has __aarch64_cpu_features, it 
will be broken if compiler-rt miss that ( 
clang/lib/Driver/ToolChains/Clang.cpp:7231 ). I believe function 
multiversioning will be used in Android as outline atomics already did.

> I also had a couple of general questions, since I think I'm missing something 
> obvious:
>
> - How come we need both `init_cpu_features` and `init_cpu_features_resolver`? 
> It seems like we're codegenning calls to the latter where needed, so I'm not 
> sure what the former is adding on top.
> - From what I can see, we're codegenning calls to 
> `init_cpu_features_resolver` without any arguments, but the actual definition 
> of that function has two arguments. How does that work?

hwcaps are ABI specified arguments of ifunc resolver. The constructor 
init_cpu_features calls getauxval to initialize hwcaps and then pass them 
explicitly to init_cpu_features_resolver. If resolver called before constructor 
we get init_cpu_features_resolver hwcap and hwcap2 as arguments from dynamic 
loader.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-28 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4018490 , @mceier wrote:

> Checked the changes I'm suggesting and they fix the standalone build.

Thanks! Fix committed 2184fcf17ee00a939b3bde98a28ef586c67d6b1a 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-22 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4012283 , @hctim wrote:

> I'm not sure "MSan didn't handle correctly SmallVector" is the case. Given 
> your diagnosis of 3-elements-vs-2, I'm guessing the root cause is that 
> `clang/lib/Sema/SemaDecl.cpp:11369` is wrong:
>
>   !std::equal(CurClones->featuresStrs_begin(),
>   CurClones->featuresStrs_end(),
>   NewClones->featuresStrs_begin( {
>
> This construction of `std::equal` is very error-prone, as if 
> `NewClones.size() < CurClones.size()`, then this invariable leads to 
> buffer-overflow. I'm wondering if that's the underlying cause, it would seem 
> entirely possible that expanding the in-place elements are always 
> "initialized" from MSan's perspective and so the current code has a 
> false-negative, and your new code made it so that the vector is now 
> heap-based, which is revealing the underlying issue. Maybe worth trying one 
> more thing and adding an `assert(CurClones->size() <= NewClones->size());` to 
> double check?

I don't think `std::equal` is underlying cause here. We have 
featuresStrs_size() comparison before calling it:

  if (CurClones && NewClones &&
(CurClones->featuresStrs_size() != NewClones->featuresStrs_size() ||
 !std::equal(CurClones->featuresStrs_begin(),
 CurClones->featuresStrs_end(),
 NewClones->featuresStrs_begin( {

Also even if we completely remove std::equal the use-of-uninitialized-value 
error still persist.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

I've managed to reproduce "MemorySanitizer: use-of-uninitialized-value" error 
locally, thank you @hctim for help!
If I understand it right, it seems **MSan didn't handle correctly SmallVector** 
- a variable-sized array with some number of elements in-place and heap 
allocation for additional elements if needed:

  clang/lib/Sema/SemaDeclAttr.cpp:3615 SmallVector, 2> 
StringsBuffer;

There were 2 elements in-placed for StringsBuffer and tests which require 3 
failed with MSan use-of-uninitialized-value error. 
With number of StringsBuffer in-placed elements set to 3

  SmallVector, 3> StringsBuffer;

all use-of-uninitialized-value errors have gone.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4011437 , @hctim wrote:

> 2. Build a sanitizer libcxx.
>
>   $ cd /tmp/2
>   $ cmake \
>   -DCMAKE_C_COMPILER=/tmp/1/bin/clang \
>   -DCMAKE_CXX_COMPILER=/tmp/1/bin/clang++ \
>   -GNinja \
>   -DLLVM_USE_SANITIZER=Memory \
>   -DCMAKE_BUILD_TYPE=Release \
>   -DLLVM_ENABLE_ASSERTIONS=ON \
>   -DLLVM_ENABLE_RUNTIMES="'libcxx;libcxxabi'" \
>   -DLLVM_USE_LINKER="'lld'" \
>   /path/to/llvm/runtimes/ # < Make sure this is *runtimes*, not llvm.
>   $ ninja cxx cxxabi

Thank you for help! On second step pointing to llvm-project/llvm/runtimes/ I 
got:

  CMake Error at CMakeLists.txt:29 (include):   

include could not find requested file:  



  LLVMExternalProjectUtils 
  ...
  CMake Error at CMakeLists.txt:379 (if):
if given arguments:
  
  "openmp" "IN_LIST" "LLVM_ENABLE_RUNTIMES"
  
Unknown arguments specified

   


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4010993 , @hctim wrote:

> Hmm, not exactly sure what's going on with the `could NOT find Threads` 
> there. A quick googling seems to point to pthreads.so not being in the right 
> places, but I don't think the buildbot does anything special. Do your regular 
> builds with `-DLLVM_ENABLE_PROJECTS="compiler_rt;clang;lld"` work?
>
> When you say that it looks fine locally, is that from your own checkout but 
> using `-DLLVM_USE_SANITIZER=Memory`? First thing to check is that you do end 
> up with MSan in the test (in particular the clang binary that's being 
> produced), which you can do by `nm bin/clang-16 | grep __msan_init`.

Regular builds works fine for me, pthreads located here 
"/lib/x86_64-linux-gnu/libpthread.so" 
"/usr/lib/x86_64-linux-gnu/libpthread.so". Enabling 
"-DLLVM_USE_SANITIZER=Memory" resulted in many "WARNING: MemorySanitizer: 
use-of-uninitialized-value" on tblgen like:

  cd /data/ReleasesToCommit/llvm-project/build && 
/data/ReleasesToCommit/llvm-project/build/bin/llvm-tblgen -gen-intrinsic-enums 
-intrinsic-prefix=s390 -I 
/data/ReleasesToCommit/llvm-project/llvm/include/llvm/IR 
-I/data/ReleasesToCommit/llvm-project/build/include 
-I/data/ReleasesToCommit/llvm-project/llvm/include 
/data/ReleasesToCommit/llvm-project/llvm/include/llvm/IR/Intrinsics.td 
--write-if-changed -o include/llvm/IR/IntrinsicsS390.h -d 
include/llvm/IR/IntrinsicsS390.h.d
  [build] ==2441251==WARNING: MemorySanitizer: use-of-uninitialized-value


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.



In D127812#4009577 , @hctim wrote:

> In D127812#4009447 , @hctim wrote:
>
>> Hi, this looks like a candidate for breaking the MSan bot: 
>> https://lab.llvm.org/buildbot/#/builders/5/builds/30139
>>
>> Still looking into it and bisecting, will let you know when I have more 
>> info. To reproduce the bots, the best way (because MSan setup is tricky 
>> because it requires an instrumented libcxx) is to use the scripts from 
>> https://github.com/google/sanitizers/wiki/SanitizerBotReproduceBuild 
>> (buildbot_fast.sh is the right one).
>
> Yeah, unfortunately I did track this failure down to this commit and reverted 
> it upstream. If you need help figuring it out, please let me know. You may 
> find that adding `-fsanitize-memory-track-origins=2` useful to add to the 
> buildscript as well (which can be done by changing `check_stage2_msan` to 
> `check_stage2_msan_track_origins` in `buildbot_fast.sh`.

It would be great to have more details how to setup up your bot, using 
buildbot_fast.sh on x86_64 Ubuntu 22.04 LTS leads to error ( pthreads installed 
):

  CMake Error at 
/usr/share/cmake-3.22/Modules/FindPackageHandleStandardArgs.cmake:230 (message):
Could NOT find Threads (missing: Threads_FOUND)
  Call Stack (most recent call first):
/usr/share/cmake-3.22/Modules/FindPackageHandleStandardArgs.cmake:594 
(_FPHSA_FAILURE_MESSAGE)
/usr/share/cmake-3.22/Modules/FindThreads.cmake:238 
(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
cmake/config-ix.cmake:114 (find_package)
CMakeLists.txt:776 (include)

Also MemorySanitizer: use-of-uninitialized-value cases from 
https://lab.llvm.org/buildbot/#/builders/5/builds/30139 looks fine locally, all 
values initialized, could MSAN produce false positive results?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4009014 , @paulkirth wrote:

> Hi, thanks for the fix. that unblocked our builder. Unfortunately, we still 
> see some errors in tests.
>
>   FAIL: Clang :: Driver/aarch64-features.c (7460 of 16622)
>    TEST 'Clang :: Driver/aarch64-features.c' FAILED 
> 
>   Script:
>   --
>   : 'RUN: at line 1';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-none-linux-gnu -### 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 
> -fsyntax-only 2>&1 | /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 2';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> arm64-none-linux-gnu -### 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 
> -fsyntax-only 2>&1 | /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 10';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-linux-android -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck -check-prefix=CHECK-FMV 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 13';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-linux-android -rtlib=compiler-rt -mno-fmv  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck -check-prefix=CHECK-FMV-OFF 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 16';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-linux-gnu  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck -check-prefix=CHECK-FMV-OFF 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 19';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> arm64-unknown-linux -rtlib=libgcc  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck -check-prefix=CHECK-FMV-OFF 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 26';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-linux-android -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-ON 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 29';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-linux-gnu -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-ON 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 32';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> arm64-unknown-linux -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-ON 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 35';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64--none-eabi -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-OFF 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 38';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-apple-darwin -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-OFF 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 41';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-windows-gnu -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-OFF 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c
>   : 'RUN: at line 44';   /b/s/w/ir/x/w/staging/llvm_build/bin/clang -target 
> aarch64-unknown-openbsd -rtlib=compiler-rt  -### -c 
> /b/s/w/ir/x/w/llvm-llvm-project/clang/test/Driver/aarch64-features.c 2>&1 | 
> /b/s/w/ir/x/w/staging/llvm_build/bin/FileCheck 
> -check-prefix=CHECK-OUTLINE-ATOMICS-OFF 
> 

[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#4008451 , @paulkirth wrote:

> Hi, we're seeing a build failure in Fuchsia's Clang CI. We're seeing this on 
> all of our builders: arm64 & x64 linux, mac and windows
>
>   FAILED: CMakeFiles/clang_rt.builtins-aarch64.dir/cpu_model.c.o 
>   /b/s/w/ir/x/w/recipe_cleanup/cxx-rbevgw5lbzc/reclient-cxx-wrapper.sh 
> /b/s/w/ir/x/w/staging/llvm_build/./bin/clang 
> --target=aarch64-unknown-linux-gnu --sysroot=/b/s/w/ir/x/w/cipd/linux 
> -DHAS_ASM_LSE -DVISIBILITY_HIDDEN  --target=aarch64-unknown-linux-gnu -O2 -g 
> -DNDEBUG -DCOMPILER_RT_HAS_FLOAT16 -std=c11 -fPIC -fno-builtin 
> -fvisibility=hidden -fomit-frame-pointer -MD -MT 
> CMakeFiles/clang_rt.builtins-aarch64.dir/cpu_model.c.o -MF 
> CMakeFiles/clang_rt.builtins-aarch64.dir/cpu_model.c.o.d -o 
> CMakeFiles/clang_rt.builtins-aarch64.dir/cpu_model.c.o -c 
> /b/s/w/ir/x/w/llvm-llvm-project/compiler-rt/lib/builtins/cpu_model.c
>   ../../../../llvm-llvm-project/compiler-rt/lib/builtins/cpu_model.c:1233:15: 
> error: use of undeclared identifier 'HWCAP_CPUID'
> if (hwcap & HWCAP_CPUID) {
> ^
>   1 error generated.
>
> Bot: 
> https://ci.chromium.org/ui/p/fuchsia/builders/toolchain.ci/clang-linux-x64-rbe/b8794244402291698129/overview
>
> Can you revert if the fix is going to be hard. I imagine in this case the 
> preprocessor logic is slightly off, so a forward fix may be easy.

Thanks for report, fixed in a43f36142c501e2d3f4797ef938db4e0c5e0eeec 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-19 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Does anyone have any more objections? I'm going to merge it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-01 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: llvm/include/llvm/Support/AArch64TargetParser.def:115
+AARCH64_ARCH_EXT_NAME("rdm",   AArch64::AEK_RDM, "+rdm",   
   "-rdm", \
+RDM,  "+rdm,+fp-armv8,+neon,+jsconv,+complxnum",   
 70)
+AARCH64_ARCH_EXT_NAME("crypto",AArch64::AEK_CRYPTO,  "+crypto",
   "-crypto",  \

dmgreen wrote:
> ilinpv wrote:
> > dmgreen wrote:
> > > Should RDM be dependant on +jsconv,+complxnum?
> > I suppose RDM implies simd (+neon) which implies +jsconv,+complxnum
> jsconv and complxnum are 8.3 features, are they not? Or do they somehow mean 
> something different here? It would seem strange for neon to depend on them.
Right, last implication is true from 8.3, and wrong before that, thanks for 
spotting that! I will fix that in other places too.

```
FEAT_JSCVT implements the functionality identified by 0b0001.
In Armv8.0, Armv8.1, and Armv8.2, the only permitted value is 0b.
From Armv8.3, if Advanced SIMD or Floating-point is implemented, the only 
permitted value is 0b0001.
From Armv8.3, if Advanced SIMD or Floating-point is not implemented, the only 
permitted value is 0b.

```





Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-01 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: llvm/include/llvm/Support/AArch64TargetParser.def:115
+AARCH64_ARCH_EXT_NAME("rdm",   AArch64::AEK_RDM, "+rdm",   
   "-rdm", \
+RDM,  "+rdm,+fp-armv8,+neon,+jsconv,+complxnum",   
 70)
+AARCH64_ARCH_EXT_NAME("crypto",AArch64::AEK_CRYPTO,  "+crypto",
   "-crypto",  \

dmgreen wrote:
> Should RDM be dependant on +jsconv,+complxnum?
I suppose RDM implies simd (+neon) which implies +jsconv,+complxnum


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] FMV support and necessary target features dependencies.

2022-12-01 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked an inline comment as done.
ilinpv added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:1311
+  // CPU features already initialized.
+  if (__aarch64_cpu_features.features)
+return;

danielkiss wrote:
> I'd add a init value for the declaration to be sure it is properly 
> initialised. 
I reply on __aarch64_cpu_features as global is guaranteed to be initialised to 
0.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D129802: [DRAFT] Implementing new atomic orderings in LLVM and generate barriers for legacy __sync builtins. Support corresponding memory model in outline atomics as well.

2022-09-26 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

@sebpop could you ellaborate on __sync_* operations usage, are you getting 
issues with current Clang implementation? Do Clang need to keep supporting them 
and fix introducing new memory model? It seems we need compelling reasons to do 
that.

>> However if sync primitives are hardly used in the real world then perhaps it 
>> is about time to deprecate them with annoying warnings, and completely 
>> remove support next year. Does that sound reasonable?
>
> I don't know that they are "hardly used" in the real world -- there is 
> certainly legacy code using them, although I haven't attempted to quantify 
> the amount. Another interesting question would be: how much of that code is 
> actually correct, at all. I'd be rather surprised if someone could identify a 
> single piece of software which is actually correct when using the __sync_* 
> operations on GCC, but is incorrect on Clang.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129802/new/

https://reviews.llvm.org/D129802

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] Function multiversioning support added.

2022-07-14 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64.td:74
 
+def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true",
+  "Enable Function Multi Versioning support.">;

echristo wrote:
> What is this for?
FMV is a target feature which is enabled by default and can be disabled 
(-mno-fmv ). Accoding to ACLE spec "FMV may be disabled in compile time by a 
compiler flag. In this case the default version shall be used."


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] Function multiversioning support added.

2022-06-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: clang/docs/ClangCommandLineReference.rst:1086
 
+.. option:: -mno-fmv
+

xbolva00 wrote:
> ilinpv wrote:
> > MaskRay wrote:
> > > This file is auto-generated. Don't touch it.
> > It looked out of sync with options td files:
> > 
> > ```
> > +.. option:: -gen-reproducer=, -fno-crash-diagnostics (equivalent to 
> > -gen-reproducer=off)
> > +
> > +Emit reproducer on (option: off, crash (default), error, always)
> > +
> > 
> > +.. option:: -print-diagnostic-options, --print-diagnostic-options
> > +
> > +Print all of Clang's warning options
> > +
> > 
> > +.. option:: -fdriver-only
> > +
> > +Only run the driver.
> > +
> > 
> > ...
> > ```
> But it needs to be manually autogenerated..
Done https://reviews.llvm.org/D128116, thanks to @MaskRay 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D128116: [clang][docs] Sync generated command line doc with td files.

2022-06-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv closed this revision.
ilinpv added a comment.

Commited in af6d2a0b6825e71965f3e2701a63c239fa0ad70f 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128116/new/

https://reviews.llvm.org/D128116

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D128116: [clang][docs] Sync generated command line doc with td files.

2022-06-18 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added a reviewer: MaskRay.
Herald added a subscriber: StephenFan.
Herald added a project: All.
ilinpv requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

It seems auto-generated ClangCommandLineReference.rst is out of sync with 
options td files.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D128116

Files:
  clang/docs/ClangCommandLineReference.rst

Index: clang/docs/ClangCommandLineReference.rst
===
--- clang/docs/ClangCommandLineReference.rst
+++ clang/docs/ClangCommandLineReference.rst
@@ -281,6 +281,10 @@
 
 Emit type record hashes in a .debug$H section
 
+.. option:: -gen-reproducer=, -fno-crash-diagnostics (equivalent to -gen-reproducer=off)
+
+Emit reproducer on (option: off, crash (default), error, always)
+
 .. option:: --gpu-instrument-lib=
 
 Instrument device library for HIP, which is a LLVM bitcode containing \_\_cyg\_profile\_func\_enter and \_\_cyg\_profile\_func\_exit
@@ -339,7 +343,7 @@
 
 .. option:: -mharden-sls=
 
-Select straight-line speculation hardening scope
+Select straight-line speculation hardening scope (ARM/AArch64/X86 only).  must be: all, none, retbr(ARM/AArch64), blr(ARM/AArch64), comdat(ARM/AArch64), nocomdat(ARM/AArch64), return(X86), indirect-jmp(X86)
 
 .. option:: --migrate
 
@@ -551,6 +555,10 @@
 
 .. option:: --print-diagnostic-categories
 
+.. option:: -print-diagnostic-options, --print-diagnostic-options
+
+Print all of Clang's warning options
+
 .. option:: -print-effective-triple, --print-effective-triple
 
 Print the effective target triple
@@ -817,6 +825,10 @@
 
 Extract API information
 
+.. option:: -fdriver-only
+
+Only run the driver.
+
 .. option:: -fsyntax-only
 
 .. option:: -module-file-info
@@ -929,10 +941,6 @@
 
 Inline functions which are (explicitly or implicitly) marked inline
 
-.. option:: -fno-crash-diagnostics
-
-Disable auto-generation of preprocessed source files and a script for reproduction during a clang crash
-
 .. option:: -fno-legacy-pass-manager, -fexperimental-new-pass-manager
 
 .. option:: -fno-sanitize-ignorelist, -fno-sanitize-blacklist
@@ -1106,7 +1114,9 @@
 
 Include comments from within macros in preprocessed output
 
-.. option:: -D=, --define-macro , --define-macro=
+.. program:: clang2
+.. option:: -D=, --D, /D, -D, --define-macro , --define-macro=
+.. program:: clang
 
 Define  to  (or 1 if  omitted)
 
@@ -1727,6 +1737,10 @@
 
 Emit all declarations, even if unused
 
+.. option:: -femit-dwarf-unwind=
+
+When to emit DWARF unwind (EH frame) info.  must be 'always', 'no-compact-unwind' or 'default'.
+
 .. option:: -femulated-tls, -fno-emulated-tls
 
 Use emutls functions to access thread\_local variables
@@ -1903,6 +1917,10 @@
 
 Run cc1 in-process
 
+.. option:: -fintegrated-objemitter, -fno-integrated-objemitter
+
+Use internal machine object code emitter.
+
 .. option:: -fjmc, -fno-jmc
 
 Enable just-my-code debugging
@@ -2767,7 +2785,7 @@
 
 .. option:: -fzero-call-used-regs=
 
-Clear call-used registers upon function return.  must be 'skip', 'used-gpr-arg', 'used-gpr', 'used-arg', 'used', 'all-gpr-arg', 'all-gpr', 'all-arg' or 'all'.
+Clear call-used registers upon function return (AArch64/x86 only).  must be 'skip', 'used-gpr-arg', 'used-gpr', 'used-arg', 'used', 'all-gpr-arg', 'all-gpr', 'all-arg' or 'all'.
 
 .. option:: -fzero-initialized-in-bss, -fno-zero-initialized-in-bss
 
@@ -2795,6 +2813,10 @@
 
 OpenCL only. Allow denormals to be flushed to zero.
 
+.. option:: -cl-ext=,...
+
+OpenCL only. Enable or disable OpenCL extensions/optional features. The argument is a comma-separated sequence of one or more extension names, each prefixed by '+' or '-'.
+
 .. option:: -cl-fast-relaxed-math
 
 OpenCL only. Sets -cl-finite-math-only and -cl-unsafe-math-optimizations, and defines \_\_FAST\_RELAXED\_MATH\_\_.
@@ -3073,6 +3095,10 @@
 
 .. option:: -mdefault-build-attributes, -mno-default-build-attributes
 
+.. option:: -mdefault-visibility-export-mapping=
+
+Mapping between default visibility and export.  must be 'none', 'explicit' or 'all'.
+
 .. option:: -mdll
 
 .. option:: -mdouble= to the linker
 
-.. option:: --offload-link
-
-Use the linker supporting offloading device linking.
-
 .. option:: -X
 
 .. option:: -Xlinker , --for-linker , --for-linker=
 
 Pass  to the linker
 
-.. option:: -Xoffload-linker , -Xoffload-linker- 
+.. option:: -Xoffload-linker 
 
-Pass  to all the device linking jobs, or for only  if specified.
+Pass  to the offload linkers or the ones idenfied by -
 
 .. program:: clang1
 .. option:: -Z
@@ -4255,6 +4277,10 @@
 .. option:: -nostdlib, --no-standard-libraries
 .. program:: clang
 
+.. option:: --offload-link
+
+Use the new offloading linker to perform the link job.
+
 .. option:: -pie
 
 .. option:: -r
@@ -4295,15 +4321,21 @@
 ===
 dxc compatibility options
 
-.. program:: clang2
+.. program:: clang3
 

[PATCH] D127812: [AArch64] Function multiversioning support added.

2022-06-15 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: clang/include/clang/AST/ASTContext.h:3090
 
+  std::vector
+  filterFunctionTargetVersionAttrs(const TargetVersionAttr *TV) const;

erichkeane wrote:
> It is concerning that this differs from the above.  
target_version supports features only, that is similar to ParsedTargetAttr { 
std::vector Features; }.




Comment at: clang/include/clang/Basic/DiagnosticSemaKinds.td:11483
+def warn_target_clone_no_impact_options
+: Warning<"version list contains no code impact entries">,
+  InGroup;

erichkeane wrote:
> I'm not clear as to what this means?
It gives a warning if target_clones attributes contains features which have no 
impact on code generation ( no supported yet ) and ignored. They has "" 
OPTION in llvm/include/llvm/Support/AArch64TargetParser.def 
See clang/test/Sema/attr-target-clones-aarch64.c tests
```
// expected-warning@+1 {{version list contains no code impact entries}}
void __attribute__((target_clones("sha1+pmull"))) warn2(void);

// expected-warning@+1 {{version list contains no code impact entries}}
int __attribute__((target_clones("rng", "fhm+dpb+sha1", "default"))) 
redecl4(void) { return 1; }
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] Function multiversioning support added.

2022-06-15 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: clang/docs/ClangCommandLineReference.rst:1086
 
+.. option:: -mno-fmv
+

MaskRay wrote:
> This file is auto-generated. Don't touch it.
It looked out of sync with options td files:

```
+.. option:: -gen-reproducer=, -fno-crash-diagnostics (equivalent to 
-gen-reproducer=off)
+
+Emit reproducer on (option: off, crash (default), error, always)
+

+.. option:: -print-diagnostic-options, --print-diagnostic-options
+
+Print all of Clang's warning options
+

+.. option:: -fdriver-only
+
+Only run the driver.
+

...
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127812: [AArch64] Function multiversioning support added.

2022-06-15 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

In D127812#3585249 , @erichkeane 
wrote:

> I'm concerned as to the design of this addition, I don't particularly 
> appreciate the reasons for making 'target_clones' different, nor the purpose 
> for adding a new attribute instead of using 'target' for what seems like 
> exactly that?  IF the new spelling is THAT necessary, we perhaps don't need a 
> whole new attribute for it either.

Thank you for fair concern, "target_clones" for AArch64 has different format, 
semantic, e.g. "default" is not required.  Therefore it diverges with X86 in 
these parts. "target" attribute has been already used and supported on AArch64 
in a different sense, like target("arm"), target("dotprod"), 
target("branch-protection=bti"). The intention of creating new "target_version" 
attribute is not to overlap with that. It also has different format, mangling 
and semantic, e.g. treating function without attribute as "default", and option 
to disable attribute droping function multi versions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127812/new/

https://reviews.llvm.org/D127812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91157: [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

2022-05-17 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

I think it looks reasonable to define 5th memory model, add barriers __sync_* 
builtins and to outline-atomics calls as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91157/new/

https://reviews.llvm.org/D91157

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97510: [AArch64][Docs] Release notes 12.x on outline atomics

2021-03-01 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv closed this revision.
ilinpv added a comment.

Tim, thank you for review, noticed typos and overall support to outline atomics 
patches!
Notes pushed rG98f06b16a313ece593f5711778d7da9037f3a2ef 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97510/new/

https://reviews.llvm.org/D97510

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97510: [AArch64][Docs] Release notes 12.x on outline atomics

2021-02-25 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 326550.
ilinpv added a comment.

Fixing typos


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97510/new/

https://reviews.llvm.org/D97510

Files:
  clang/docs/ReleaseNotes.rst


Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -73,6 +73,15 @@
 
 - ...
 
+- AArch64 options ``-moutline-atomics``, ``-mno-outline-atomics`` to enable
+  and disable calls to helper functions implementing atomic operations. These
+  out-of-line helpers like '__aarch64_cas8_relax' will detect at runtime
+  AArch64 Large System Extensions (LSE) availability and either use their
+  atomic instructions, or fall back to LL/SC loop. These options do not apply,
+  if compilation target supports LSE. Atomic instructions are used directly in
+  that case. The options behaviour mirrors GCC, the helpers are implemented
+  both in compiler-rt and libgcc.
+
 - -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
   for contents of a precompiled header in a separate object file. This object
   file needs to be linked in, but its contents do not need to be generated


Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -73,6 +73,15 @@
 
 - ...
 
+- AArch64 options ``-moutline-atomics``, ``-mno-outline-atomics`` to enable
+  and disable calls to helper functions implementing atomic operations. These
+  out-of-line helpers like '__aarch64_cas8_relax' will detect at runtime
+  AArch64 Large System Extensions (LSE) availability and either use their
+  atomic instructions, or fall back to LL/SC loop. These options do not apply,
+  if compilation target supports LSE. Atomic instructions are used directly in
+  that case. The options behaviour mirrors GCC, the helpers are implemented
+  both in compiler-rt and libgcc.
+
 - -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
   for contents of a precompiled header in a separate object file. This object
   file needs to be linked in, but its contents do not need to be generated
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97510: [AArch64][Docs] Release notes 12.x on outline atomics

2021-02-25 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: willlovett, ktkachov.
Herald added subscribers: danielkiss, jfb, kristof.beyls.
ilinpv requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Description for AArch64 -moutline-atomics, -mno-outline-atomics
options added to release notes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97510

Files:
  clang/docs/ReleaseNotes.rst


Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -73,6 +73,15 @@
 
 - ...
 
+- AArch64 options ``-moutline-atomics``, ``-mno-outline-atomics`` to enable
+  and disable calls to helper functions implementing atomic operations. These
+  out-of-line helpers like '__aarch64_cas8_relax' will detect at runtime
+  AArch64 Large System Extensions (LSE) availability and either use their
+  atomic instructions, or falls back to LL/SC loop. These options do not apply,
+  if compilation target supports LSE. Atomic instruction are used directly in
+  that case. The options behaviour mirrors GCC, the helpers are implemented
+  both in compiler-rt and libgcc.
+
 - -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
   for contents of a precompiled header in a separate object file. This object
   file needs to be linked in, but its contents do not need to be generated


Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -73,6 +73,15 @@
 
 - ...
 
+- AArch64 options ``-moutline-atomics``, ``-mno-outline-atomics`` to enable
+  and disable calls to helper functions implementing atomic operations. These
+  out-of-line helpers like '__aarch64_cas8_relax' will detect at runtime
+  AArch64 Large System Extensions (LSE) availability and either use their
+  atomic instructions, or falls back to LL/SC loop. These options do not apply,
+  if compilation target supports LSE. Atomic instruction are used directly in
+  that case. The options behaviour mirrors GCC, the helpers are implemented
+  both in compiler-rt and libgcc.
+
 - -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
   for contents of a precompiled header in a separate object file. This object
   file needs to be linked in, but its contents do not need to be generated
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-29 Thread Pavel Iliin via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc5e7e649d537: [AArch64][Clang][Linux] Enable out-of-line 
atomics by default. (authored by ilinpv).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/libgcc.a
  clang/test/Driver/aarch64-features.c

Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -6,3 +6,60 @@
 // The AArch64 PCS states that chars should be unsigned.
 // CHECK: fno-signed-char
 
+// Check for AArch64 out-of-line atomics default settings.
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target arm64-unknown-linux -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64--none-eabi -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-windows-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-unknown-openbsd -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target arm64-linux -rtlib=compiler-rt -mno-outline-atomics \
+// RUN: -### -c %s 2>&1 | FileCheck \
+// RUN: -check-prefixes=CHECK-OUTLINE-ATOMICS-OFF,CHECK-NO-OUTLINE-ATOMICS %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc -mno-outline-atomics \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck \
+// RUN: -check-prefixes=CHECK-OUTLINE-ATOMICS-OFF,CHECK-NO-OUTLINE-ATOMICS %s
+
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt -moutline-atomics \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-windows-gnu -rtlib=libgcc -moutline-atomics \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// CHECK-OUTLINE-ATOMICS-ON: "-target-feature" "+outline-atomics"
+// CHECK-OUTLINE-ATOMICS-OFF-NOT: "-target-feature" "+outline-atomics"
+// CHECK-NO-OUTLINE-ATOMICS: "-target-feature" "-outline-atomics"
Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void 

[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-29 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 320142.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/libgcc.a
  clang/test/Driver/aarch64-features.c

Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -6,3 +6,60 @@
 // The AArch64 PCS states that chars should be unsigned.
 // CHECK: fno-signed-char
 
+// Check for AArch64 out-of-line atomics default settings.
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target arm64-unknown-linux -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64--none-eabi -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-windows-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-unknown-openbsd -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+
+// RUN: %clang -target arm64-linux -rtlib=compiler-rt -mno-outline-atomics \
+// RUN: -### -c %s 2>&1 | FileCheck \
+// RUN: -check-prefixes=CHECK-OUTLINE-ATOMICS-OFF,CHECK-NO-OUTLINE-ATOMICS %s
+
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc -mno-outline-atomics \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck \
+// RUN: -check-prefixes=CHECK-OUTLINE-ATOMICS-OFF,CHECK-NO-OUTLINE-ATOMICS %s
+
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt -moutline-atomics \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// RUN: %clang -target aarch64-windows-gnu -rtlib=libgcc -moutline-atomics \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+
+// CHECK-OUTLINE-ATOMICS-ON: "-target-feature" "+outline-atomics"
+// CHECK-OUTLINE-ATOMICS-OFF-NOT: "-target-feature" "+outline-atomics"
+// CHECK-NO-OUTLINE-ATOMICS: "-target-feature" "-outline-atomics"
Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList ,
llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
+  bool
+  IsAArch64OutlineAtomicsDefault(const 

[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-29 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 320123.
ilinpv added a comment.

Tests for "-m[no]outline-atomics" options added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/libgcc.a
  clang/test/Driver/aarch64-features.c

Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -6,3 +6,42 @@
 // The AArch64 PCS states that chars should be unsigned.
 // CHECK: fno-signed-char
 
+// Check for AArch64 out-of-line atomics default settings.
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target arm64-unknown-linux -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64--none-eabi -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-windows-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-unknown-openbsd -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target arm64-linux -rtlib=compiler-rt -mno-outline-atomics \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc -mno-outline-atomics \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt -moutline-atomics \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-windows-gnu -rtlib=libgcc -moutline-atomics \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// CHECK-OUTLINE-ATOMICS-ON: "-target-feature" "+outline-atomics"
+// CHECK-OUTLINE-ATOMICS-OFF-NOT: "-target-feature" "+outline-atomics"
Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList ,
llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
+  bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const override;
   bool isPIEDefault() const override;
   bool 

[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-28 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 319951.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-10/lib/gcc/aarch64-unknown-linux-gnu/10/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0/lib/gcc/aarch64-unknown-linux-gnu/7.5.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0/lib/gcc/aarch64-unknown-linux-gnu/9.3.0/libgcc.a
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/crtbegin.o
  
clang/test/Driver/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1/lib/gcc/aarch64-unknown-linux-gnu/9.3.1/libgcc.a
  clang/test/Driver/aarch64-features.c

Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -6,3 +6,32 @@
 // The AArch64 PCS states that chars should be unsigned.
 // CHECK: fno-signed-char
 
+// Check for AArch64 out-of-line atomics default settings.
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target arm64-unknown-linux -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64--none-eabi -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-windows-gnu -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-unknown-openbsd -rtlib=compiler-rt \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-10 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-7.5.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.1 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=libgcc \
+// RUN: --gcc-toolchain=%S/Inputs/aarch64-linux-gnu-tree/gcc-9.3.0 \
+// RUN: -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// CHECK-OUTLINE-ATOMICS-ON: "-target-feature" "+outline-atomics"
+// CHECK-OUTLINE-ATOMICS-OFF-NOT: "-target-feature" "+outline-atomics"
Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList ,
llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
+  bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const override;
   bool isPIEDefault() const override;
   bool isNoExecStackDefault() const override;
   bool IsMathErrnoDefault() const override;
Index: clang/lib/Driver/ToolChains/Linux.cpp
===
--- clang/lib/Driver/ToolChains/Linux.cpp
+++ clang/lib/Driver/ToolChains/Linux.cpp
@@ -845,6 +845,19 @@
   getTriple().isMusl() || getSanitizerArgs().requiresPIE();
 }
 
+bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList ) const {
+  // Outline atomics for AArch64 are supported by compiler-rt
+  // and libgcc since 9.3.1
+  assert(getTriple().isAArch64() && "expected AArch64 target!");
+  ToolChain::RuntimeLibType RtLib = GetRuntimeLibType(Args);
+  if (RtLib == ToolChain::RLT_CompilerRT)
+return true;
+  assert(RtLib == ToolChain::RLT_Libgcc && "unexpected runtime library type!");
+  if (GCCInstallation.getVersion().isOlderThan(9, 3, 1))
+  

[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-27 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 319693.
ilinpv added a comment.

Clang driver tests for outline atomics were added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  clang/test/Driver/aarch64-features.c


Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -6,3 +6,13 @@
 // The AArch64 PCS states that chars should be unsigned.
 // CHECK: fno-signed-char
 
+// Check for AArch64 out-of-line atomics default settings.
+// RUN: %clang -target aarch64-linux-android -rtlib=compiler-rt -### -c %s 
2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64-linux-gnu -rtlib=compiler-rt -### -c %s 2>&1 | 
FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target arm64-unknown-linux -rtlib=compiler-rt -### -c %s 2>&1 
| FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-ON %s
+// RUN: %clang -target aarch64--none-eabi -rtlib=compiler-rt -### -c %s 2>&1 | 
FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-apple-darwin -rtlib=compiler-rt -### -c %s 2>&1 
| FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-windows-gnu -rtlib=compiler-rt -### -c %s 2>&1 
| FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// RUN: %clang -target aarch64-unknown-openbsd -rtlib=compiler-rt -### -c %s 
2>&1 | FileCheck -check-prefix=CHECK-OUTLINE-ATOMICS-OFF %s
+// CHECK-OUTLINE-ATOMICS-ON: "-target-feature" "+outline-atomics"
+// CHECK-OUTLINE-ATOMICS-OFF-NOT: "-target-feature" "+outline-atomics"
Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList ,
llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
+  bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const 
override;
   bool isPIEDefault() const override;
   bool isNoExecStackDefault() const override;
   bool IsMathErrnoDefault() const override;
Index: clang/lib/Driver/ToolChains/Linux.cpp
===
--- clang/lib/Driver/ToolChains/Linux.cpp
+++ clang/lib/Driver/ToolChains/Linux.cpp
@@ -845,6 +845,21 @@
   getTriple().isMusl() || getSanitizerArgs().requiresPIE();
 }
 
+bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList ) const {
+  if (!getTriple().isAArch64())
+return false;
+  // Outline atomics for AArch64 are supported by compiler-rt
+  // and libgcc since 9.3.1
+  if (GetRuntimeLibType(Args) == ToolChain::RLT_Libgcc) {
+const GCCVersion  = GCCInstallation.getVersion();
+if (Ver.isOlderThan(9, 3, 1))
+  return false;
+  } else if (GetRuntimeLibType(Args) != ToolChain::RLT_CompilerRT) {
+return false;
+  }
+  return true;
+}
+
 bool Linux::isNoExecStackDefault() const {
 return getTriple().isAndroid();
 }
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6481,6 +6481,12 @@
   CmdArgs.push_back("-target-feature");
   CmdArgs.push_back("-outline-atomics");
 }
+  } else {
+if (Triple.isAArch64() &&
+getToolChain().IsAArch64OutlineAtomicsDefault(Args)) {
+  CmdArgs.push_back("-target-feature");
+  CmdArgs.push_back("+outline-atomics");
+}
   }
 
   if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
Index: clang/include/clang/Driver/ToolChain.h
===
--- clang/include/clang/Driver/ToolChain.h
+++ clang/include/clang/Driver/ToolChain.h
@@ -456,6 +456,12 @@
   /// by default.
   virtual bool IsUnwindTablesDefault(const llvm::opt::ArgList ) const;
 
+  /// Test whether this toolchain supports outline atomics by default.
+  virtual bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const {
+return false;
+  }
+
   /// Test whether this toolchain defaults to PIC.
   virtual bool isPICDefault() const = 0;
 


Index: clang/test/Driver/aarch64-features.c
===
--- clang/test/Driver/aarch64-features.c
+++ clang/test/Driver/aarch64-features.c
@@ -6,3 +6,13 @@
 // The AArch64 PCS states that chars should be unsigned.
 // CHECK: fno-signed-char
 
+// Check for AArch64 out-of-line atomics default settings.
+// RUN: %clang -target aarch64-linux-android 

[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-25 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

@t.p.northover sorry for pinging you, do you have any concerns about latest 
patch? It enables outline atomics by default on Aarch64/Linux/Clang with 
libraries check. I am hoping to get this in before LLVM 12 branch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93585: [AArch64][Clang][Linux] Enable out-of-line atomics by default.

2021-01-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 317849.
ilinpv retitled this revision from "[AArch64] Enable out-of-line atomics by 
default." to "[AArch64][Clang][Linux] Enable out-of-line atomics by default.".
ilinpv edited the summary of this revision.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h


Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList ,
llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
+  bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const 
override;
   bool isPIEDefault() const override;
   bool isNoExecStackDefault() const override;
   bool IsMathErrnoDefault() const override;
Index: clang/lib/Driver/ToolChains/Linux.cpp
===
--- clang/lib/Driver/ToolChains/Linux.cpp
+++ clang/lib/Driver/ToolChains/Linux.cpp
@@ -836,6 +836,21 @@
   getTriple().isMusl() || getSanitizerArgs().requiresPIE();
 }
 
+bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList ) const {
+  if (!getTriple().isAArch64())
+return false;
+  // Outline atomics for AArch64 are supported by compiler-rt
+  // and libgcc since 9.3.1
+  if (GetRuntimeLibType(Args) == ToolChain::RLT_Libgcc) {
+const GCCVersion  = GCCInstallation.getVersion();
+if (Ver.isOlderThan(9, 3, 1))
+  return false;
+  } else if (GetRuntimeLibType(Args) != ToolChain::RLT_CompilerRT) {
+return false;
+  }
+  return true;
+}
+
 bool Linux::isNoExecStackDefault() const {
 return getTriple().isAndroid();
 }
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6441,6 +6441,12 @@
   CmdArgs.push_back("-target-feature");
   CmdArgs.push_back("-outline-atomics");
 }
+  } else {
+if (Triple.isAArch64() &&
+getToolChain().IsAArch64OutlineAtomicsDefault(Args)) {
+  CmdArgs.push_back("-target-feature");
+  CmdArgs.push_back("+outline-atomics");
+}
   }
 
   if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
Index: clang/include/clang/Driver/ToolChain.h
===
--- clang/include/clang/Driver/ToolChain.h
+++ clang/include/clang/Driver/ToolChain.h
@@ -456,6 +456,12 @@
   /// by default.
   virtual bool IsUnwindTablesDefault(const llvm::opt::ArgList ) const;
 
+  /// Test whether this toolchain supports outline atomics by default.
+  virtual bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const {
+return false;
+  }
+
   /// Test whether this toolchain defaults to PIC.
   virtual bool isPICDefault() const = 0;
 


Index: clang/lib/Driver/ToolChains/Linux.h
===
--- clang/lib/Driver/ToolChains/Linux.h
+++ clang/lib/Driver/ToolChains/Linux.h
@@ -36,6 +36,8 @@
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList ,
llvm::opt::ArgStringList ) const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
+  bool
+  IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList ) const override;
   bool isPIEDefault() const override;
   bool isNoExecStackDefault() const override;
   bool IsMathErrnoDefault() const override;
Index: clang/lib/Driver/ToolChains/Linux.cpp
===
--- clang/lib/Driver/ToolChains/Linux.cpp
+++ clang/lib/Driver/ToolChains/Linux.cpp
@@ -836,6 +836,21 @@
   getTriple().isMusl() || getSanitizerArgs().requiresPIE();
 }
 
+bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList ) const {
+  if (!getTriple().isAArch64())
+return false;
+  // Outline atomics for AArch64 are supported by compiler-rt
+  // and libgcc since 9.3.1
+  if (GetRuntimeLibType(Args) == ToolChain::RLT_Libgcc) {
+const GCCVersion  = GCCInstallation.getVersion();
+if (Ver.isOlderThan(9, 3, 1))
+  return false;
+  } else if (GetRuntimeLibType(Args) != ToolChain::RLT_CompilerRT) {
+return false;
+  }
+  return true;
+}
+
 bool Linux::isNoExecStackDefault() const {
 return getTriple().isAndroid();
 }
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6441,6 +6441,12 @@
   CmdArgs.push_back("-target-feature");
   

[PATCH] D93585: [AArch64] Enable out-of-line atomics by default.

2021-01-13 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked 2 inline comments as done.
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64.td:1087
  FeatureNEON,
+ FeatureOutlineAtomics,
  FeaturePerfMon,

t.p.northover wrote:
> I think this still enables it more widely than we want. Clang overrides it 
> with `-outline-atomics`, but other front-ends don't.
Could I ask you to clarify what front-ends you meant (to check outline atomics 
suport for them)?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93585: [AArch64] Enable out-of-line atomics by default.

2021-01-13 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 316471.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  llvm/lib/Target/AArch64/AArch64.td
  llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
  llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
  llvm/test/CodeGen/AArch64/atomic-ops.ll
  llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
  llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
  llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll

Index: llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
===
--- llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
+++ llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
-; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+; RUN: opt -S -mtriple=aarch64-- -mattr=-outline-atomics -atomic-expand %s | FileCheck %s
+; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
 ; CHECK-LABEL: @atomic_swap_f16(
Index: llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
===
--- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-ios7.0 -mattr=+outline-atomics -o - %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -mattr=-outline-atomics -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 ; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel
Index: llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
===
--- llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=+outline-atomics %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=-outline-atomics %s -o - | FileCheck -enable-var-scope %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS
 
 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 ; OUTLINE-ATOMICS: bl __aarch64_cas1_acq_rel
Index: llvm/test/CodeGen/AArch64/atomic-ops.ll
===
--- llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=-outline-atomics < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=-outline-atomics < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=OUTLINE_ATOMICS
 
 
 ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
Index: llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
===
--- llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=-outline-atomics < %s | FileCheck %s
+; RUN: llc 

[PATCH] D93585: [AArch64] Enable out-of-line atomics by default.

2021-01-09 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 315613.
ilinpv edited the summary of this revision.
ilinpv added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

RT library detection and check for outline atomics support added to the driver.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93585/new/

https://reviews.llvm.org/D93585

Files:
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Linux.cpp
  clang/lib/Driver/ToolChains/Linux.h
  llvm/lib/Target/AArch64/AArch64.td
  llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
  llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
  llvm/test/CodeGen/AArch64/atomic-ops.ll
  llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
  llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
  llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll

Index: llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
===
--- llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
+++ llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
-; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+; RUN: opt -S -mtriple=aarch64-- -mattr=-outline-atomics -atomic-expand %s | FileCheck %s
+; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
 ; CHECK-LABEL: @atomic_swap_f16(
Index: llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
===
--- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-ios7.0 -mattr=+outline-atomics -o - %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -mattr=-outline-atomics -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
 
 define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 ; OUTLINE-ATOMICS: bl ___aarch64_cas4_acq_rel
Index: llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
===
--- llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=+outline-atomics %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=-outline-atomics %s -o - | FileCheck -enable-var-scope %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS
 
 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 ; OUTLINE-ATOMICS: bl __aarch64_cas1_acq_rel
Index: llvm/test/CodeGen/AArch64/atomic-ops.ll
===
--- llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=-outline-atomics < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=-outline-atomics < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=OUTLINE_ATOMICS
 
 
 ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
Index: llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
===
--- llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-linux-gnu 

[PATCH] D91157: [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

2020-11-19 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6377
+} else {
+  CmdArgs.push_back("-target-feature");
+  CmdArgs.push_back("-outline-atomics");

jyknight wrote:
> We don't usually explicitly use negative features like this, do we? I think 
> this else clause can be omitted.
We do, -soft-float for example in Clang.cpp:2397

```
if (Value.startswith("-mhard-float")) {
  CmdArgs.push_back("-target-feature");
  CmdArgs.push_back("-soft-float");
  continue;
}
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91157/new/

https://reviews.llvm.org/D91157

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91157: [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

2020-11-19 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Have you got any further comments?




Comment at: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp:2170
+  SmallVector Ops;
+  if (TLI.getLibcallName(LC)) {
+Ops.append(Node->op_begin() + 2, Node->op_end());

t.p.northover wrote:
> jyknight wrote:
> > t.p.northover wrote:
> > > I think this is a bit of an abuse of the `LibcallName` mechanism. A 
> > > separate function in `TargetLowering` would probably be better.
> > I don't think that's odd or unusual -- we often condition libcall 
> > availability on getLibcallName != nullptr.
> > 
> > What does strike me here is the (pre-existing) code duplication between 
> > this function (DAGTypeLegalizer::ExapndAtomic) and 
> > SelectionDAGLegalize::ConvertNodeToLibcall. Not sure what's up with that...
> Fair enough. Didn't realise it was that common.
I noticed this existed duplication too, but find no proper place to put common 
functionality from DAGTypeLegalizer and  SelectionDAGLegalize.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91157/new/

https://reviews.llvm.org/D91157

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91157: [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

2020-11-11 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked 10 inline comments as done.
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:15653
+//   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+// (2) low level libgcc and compiler-rt support implemented by:
+//   min/max outline atomics helpers

jyknight wrote:
> So, hold on -- AArch64 has umin/umax/smin/smax instructions, but libgcc and 
> compiler-rt don't have helpers for those? That seems to be a remarkably 
> unfortunate state of affairs.
> 
> Can you fix that, by implementing those functions in the compiler-rt patch, 
> and submitting the same to libgcc?
Yes, agree, initially I was going to add min/max helpers to both compiler-rt 
and libgcc. But the point is that gcc is not generating LSE min/max at all. So 
these helpers would be untested and become rotten. There is no reason for gcc 
community to support unused code. Hopefully after  maximum/minimum 
proposal approval I can easily add the helpers to libcc and compiler-rt and 
enable [U]Min/[U]Max RWM atomics expansion in LLVM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91157/new/

https://reviews.llvm.org/D91157

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91157: [AArch64] Out-of-line atomics (-moutline-atomics) implementation.

2020-11-10 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: jyknight, eli.friedman.
Herald added subscribers: llvm-commits, cfe-commits, dexonsmith, dang, 
danielkiss, jfb, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.
ilinpv requested review of this revision.

This patch implements out of line atomics for LSE deployment
mechanism. Details how it works can be found in llvm/docs/Atomics.rst
Options -moutline-atomics and -mno-outline-atomics to enable and disable it
were added to clang driver. This is clang and llvm part of out-of-line atomics
interface, library part is already supported by libgcc. Compiler-rt
support is provided in separate patch.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D91157

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/docs/Atomics.rst
  llvm/include/llvm/CodeGen/RuntimeLibcalls.h
  llvm/include/llvm/IR/RuntimeLibcalls.def
  llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
  llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
  llvm/lib/CodeGen/TargetLoweringBase.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
  llvm/test/CodeGen/AArch64/arm64-atomic.ll
  llvm/test/CodeGen/AArch64/arm64_32-atomics.ll
  llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
  llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
  llvm/test/CodeGen/AArch64/atomic-ops.ll
  llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
  llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
  llvm/test/CodeGen/AArch64/cmpxchg-lse-even-regs.ll
  llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-23 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked an inline comment as done.
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:8058
+  def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
+   (AArch64duplane16 (v8f16 V128:$Rm),
+   VectorIndexH:$idx))),

ab wrote:
> ilinpv wrote:
> > ab wrote:
> > > Should this be V128_lo?  I don't think this is encodable for Rm in 
> > > V16-V31  (same in the other indexed f16 variants I think)
> > Yep, I double checked encoding, you are right. Thank you very much for 
> > this. Fixed in 4eca1c06a4a9183fcf7bb230d894617caf3cf3be
> Thanks Pavel!  I think this applies to the `AArch64dup` variants too, which 
> does entail adding `FPR16Op_lo` and `FPR16_lo` I imagine, and maybe a couple 
> more
Oops. Thanks again, fix landed cc457672e628846c20e92c6e0a82896f0d6db031


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-22 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added a comment.

Patterns corrected to comply with encoding 
4eca1c06a4a9183fcf7bb230d894617caf3cf3be 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-22 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked 2 inline comments as done.
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:8058
+  def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
+   (AArch64duplane16 (v8f16 V128:$Rm),
+   VectorIndexH:$idx))),

ab wrote:
> Should this be V128_lo?  I don't think this is encodable for Rm in V16-V31  
> (same in the other indexed f16 variants I think)
Yep, I double checked encoding, you are right. Thank you very much for this. 
Fixed in 4eca1c06a4a9183fcf7bb230d894617caf3cf3be


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv closed this revision.
ilinpv added a comment.

Committed be881e2831735d6879ee43710f5a4d1c8d50c615 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-21 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 259008.
ilinpv edited the summary of this revision.
ilinpv added a comment.

v2f32 pattern removed, test added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252

Files:
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll

Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
===
--- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -14,8 +14,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
@@ -29,8 +28,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
@@ -43,8 +41,7 @@
 ; CHECK:   .Lt_vfma_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v1.4h, v2.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
@@ -57,8 +54,7 @@
 ; CHECK:   .Lt_vfmaq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v1.8h, v2.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
@@ -72,8 +68,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %vecinit = insertelement <4 x half> undef, half %c, i32 0
@@ -88,8 +83,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %vecinit = insertelement <8 x half> undef, half %c, i32 0
@@ -104,7 +98,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fmadd h0, h1, h2, h0
+; CHECK-NEXT:fmla h0, h1, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %extract = extractelement <4 x half> %c, i32 0
@@ -117,7 +111,7 @@
 ; CHECK:   .Lt_vfmah_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:fmadd h0, h1, h2, h0
+; CHECK-NEXT:fmla h0, h1, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %extract = extractelement <8 x half> %c, i32 0
@@ -131,9 +125,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.4h, v1.4h
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <4 x half> , %b
@@ -148,9 +140,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.8h, v1.8h
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -164,8 +154,7 @@
 ; CHECK:   .Lt_vfms_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmls v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <4 x half> , %b
@@ -179,8 +168,7 @@
 ; CHECK:   .Lt_vfmsq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmls v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -195,9 +183,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 

[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-20 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 258865.
ilinpv added a comment.

Patterns corrected, vector_extract tests added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252

Files:
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll

Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
===
--- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -14,8 +14,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
@@ -29,8 +28,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
@@ -43,8 +41,7 @@
 ; CHECK:   .Lt_vfma_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v1.4h, v2.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
@@ -57,8 +54,7 @@
 ; CHECK:   .Lt_vfmaq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v1.8h, v2.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
@@ -72,8 +68,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %vecinit = insertelement <4 x half> undef, half %c, i32 0
@@ -88,8 +83,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %vecinit = insertelement <8 x half> undef, half %c, i32 0
@@ -104,7 +98,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fmadd h0, h1, h2, h0
+; CHECK-NEXT:fmla h0, h1, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %extract = extractelement <4 x half> %c, i32 0
@@ -117,7 +111,7 @@
 ; CHECK:   .Lt_vfmah_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:fmadd h0, h1, h2, h0
+; CHECK-NEXT:fmla h0, h1, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %extract = extractelement <8 x half> %c, i32 0
@@ -131,9 +125,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.4h, v1.4h
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <4 x half> , %b
@@ -148,9 +140,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.8h, v1.8h
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -164,8 +154,7 @@
 ; CHECK:   .Lt_vfms_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmls v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <4 x half> , %b
@@ -179,8 +168,7 @@
 ; CHECK:   .Lt_vfmsq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmls v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -195,9 +183,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:fneg 

[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-18 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv marked an inline comment as not done.
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:8055
 multiclass SIMDFPIndexedTiedPatterns {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  // 1 variant for the .8h version: DUPLANE from 128-bit

dmgreen wrote:
> ilinpv wrote:
> > dmgreen wrote:
> > > Should we have equal patterns to those below for f32 as well? So using 
> > > DUP, D vector (4xf16) and possibly from a vector_extract too.
> > I'm worried about performance impact of change fmadd/sub -> fmla/ls in last 
> > pattern case.
> What performance impact are you worried about?
I mean, can fmla/ls take more cycles that fmadd/sub, is it any performance 
improvement of such replacement?



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:8077
+
+  def : Pat<(f16 (OpNode (f16 FPR16:$Rd), (f16 FPR16:$Rn),
+ (vector_extract (v8f16 V128:$Rm), 
VectorIndexS:$idx))),

dmgreen wrote:
> Do you mean the v4f16 variant of this pattern?
This pattern exactly replaces fmadd/sub to fmla/ls, so it is questionable 
weather or not this pattern is useful.
v4f16 vector_extract variant has no any test cases at all.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-17 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64InstrFormats.td:8055
 multiclass SIMDFPIndexedTiedPatterns {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  // 1 variant for the .8h version: DUPLANE from 128-bit

dmgreen wrote:
> Should we have equal patterns to those below for f32 as well? So using DUP, D 
> vector (4xf16) and possibly from a vector_extract too.
I'm worried about performance impact of change fmadd/sub -> fmla/ls in last 
pattern case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-17 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv updated this revision to Diff 258337.
ilinpv marked an inline comment as done.
ilinpv edited the summary of this revision.
ilinpv added a comment.

More patterns added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78252/new/

https://reviews.llvm.org/D78252

Files:
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll

Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
===
--- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -14,8 +14,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
@@ -29,8 +28,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
@@ -43,8 +41,7 @@
 ; CHECK:   .Lt_vfma_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v1.4h, v2.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
@@ -57,8 +54,7 @@
 ; CHECK:   .Lt_vfmaq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v1.8h, v2.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
@@ -72,8 +68,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %vecinit = insertelement <4 x half> undef, half %c, i32 0
@@ -88,8 +83,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $h2 killed $h2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %vecinit = insertelement <8 x half> undef, half %c, i32 0
@@ -104,7 +98,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fmadd h0, h1, h2, h0
+; CHECK-NEXT:fmla h0, h1, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %extract = extractelement <4 x half> %c, i32 0
@@ -117,7 +111,7 @@
 ; CHECK:   .Lt_vfmah_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:fmadd h0, h1, h2, h0
+; CHECK-NEXT:fmla h0, h1, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %extract = extractelement <8 x half> %c, i32 0
@@ -131,9 +125,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.4h, v1.4h
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmla v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <4 x half> , %b
@@ -148,9 +140,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.8h, v1.8h
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -164,8 +154,7 @@
 ; CHECK:   .Lt_vfms_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.4h, v2.h[0]
-; CHECK-NEXT:fmls v0.4h, v2.4h, v1.4h
+; CHECK-NEXT:fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <4 x half> , %b
@@ -179,8 +168,7 @@
 ; CHECK:   .Lt_vfmsq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmls v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -195,9 +183,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// 

[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.

2020-04-15 Thread Pavel Iliin via Phabricator via cfe-commits
ilinpv created this revision.
ilinpv added reviewers: samparker, dmgreen, SjoerdMeijer.
Herald added subscribers: cfe-commits, danielkiss, hiraditya, kristof.beyls.
Herald added a project: clang.

FMLA/FMLS 8H duplane indexed patterns added.
Fixes https://bugs.llvm.org/show_bug.cgi?id=45467


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78252

Files:
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll


Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
===
--- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -29,8 +29,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> 
zeroinitializer
@@ -57,8 +56,7 @@
 ; CHECK:   .Lt_vfmaq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v1.8h, v2.8h
+; CHECK-NEXT:fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> 
zeroinitializer
@@ -148,9 +146,7 @@
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:fneg v1.8h, v1.8h
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
@@ -179,8 +175,7 @@
 ; CHECK:   .Lt_vfmsq_laneq_f16$local:
 ; CHECK-NEXT:.cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:dup v2.8h, v2.h[0]
-; CHECK-NEXT:fmls v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:ret
 entry:
   %sub = fsub <8 x half> , %b
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8052,6 +8052,15 @@
 }
 
 multiclass SIMDFPIndexedTiedPatterns {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  // 1 variant for the .8h version: DUPLANE from 128-bit
+  def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
+   (v8f16 (AArch64duplane16 (v8f16 V128:$Rm),
+VectorIndexS:$idx,
+(!cast(INST # "v8i16_indexed")
+V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+  } // Predicates = [HasNEON, HasFullFP16]
+
   // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64duplane32 (v4f32 V128:$Rm),
Index: clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
===
--- clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
+++ clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
@@ -105,7 +105,7 @@
 // COMMONIR:  [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> 
@llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> 
[[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata 
!"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, 
v{{[0-9]+}}.h[{{[0-9]+}}]
 // COMMONIR:  ret <8 x half> [[FMLA]]
 float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
   return vfmaq_lane_f16(a, b, c, 3);
@@ -213,7 +213,6 @@
 
 // COMMON-LABEL: test_vfmsq_lane_f16
 // COMMONIR:  [[SUB:%.*]]  = fneg <8 x half> %b
-// CHECK-ASM: fneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
 // COMMONIR:  [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
 // COMMONIR:  [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
 // COMMONIR:  [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
@@ -223,7 +222,7 @@
 // COMMONIR:  [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> 
[[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
 // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> 
@llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> 
[[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata