[llvm-branch-commits] [llvm] e8cdcae - [X86] Accept 64-bit GPRs for vextractps when using a register that requires EVEX.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Craig Topper
Date: 2021-02-03T19:41:57-08:00
New Revision: e8cdcaeae406527c9a76b3dc5c522391c81dfdfd

URL: 
https://github.com/llvm/llvm-project/commit/e8cdcaeae406527c9a76b3dc5c522391c81dfdfd
DIFF: 
https://github.com/llvm/llvm-project/commit/e8cdcaeae406527c9a76b3dc5c522391c81dfdfd.diff

LOG: [X86] Accept 64-bit GPRs for vextractps when using a register that 
requires EVEX.

This is consistent with the VEX version. It also fixes a sorting
issue in the matching table that caused the EVEX version to be
prioritized over VEX in intel syntax.

Fixes issue [2] from PR48991.

(cherry picked from commit c691fe14da93a7c9eff466231515d6d4d16124fa)

Added: 


Modified: 
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/test/MC/X86/intel-syntax-x86-64-avx.s
llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s

Removed: 




diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0c2b278fdd7b..19012797ae9a 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", 
v64i8_info, v32i8x_info,
   EXTRACT_get_vextract256_imm, [HasAVX512]>;
 
 // vextractps - extract 32 bits from XMM
-def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
+def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
   (ins VR128X:$src1, u8imm:$src2),
   "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-  [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), 
imm:$src2))]>,
+  [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), 
imm:$src2))]>,
   EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
 
 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),

diff  --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s 
b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s
index bb57cb287f38..c1f20d204a8c 100644
--- a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s
+++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s
@@ -167,3 +167,7 @@
 // CHECK: vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910]
 // CHECK: encoding: [0xc4,0xa1,0x6d,0xf5,0x8c,0xf1,0x02,0x00,0x00,0xe0]
   vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910]
+
+// CHECK: vextractps ecx, xmm2, 1
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd1,0x01]
+  vextractps ecx, xmm2, 1

diff  --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s 
b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
index 29bde03c5860..31c43afe5017 100644
--- a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
@@ -1260,3 +1260,6 @@
 // CHECK: encoding: [0x62,0xf1,0x7e,0x89,0xe6,0x11]
   vcvtdq2pd xmm2 {k1} {z}, qword ptr [rcx]
 
+// CHECK: vextractps ecx, xmm17, 1
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0xc9,0x01]
+  vextractps rcx, xmm17, 1



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] ad20866 - [OpenMP][NVPTX] Take functions in `deviceRTLs` as `convergent`

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Shilei Tian
Date: 2021-02-03T19:35:04-08:00
New Revision: ad2086658df181369a09ad69dac260a41dbab814

URL: 
https://github.com/llvm/llvm-project/commit/ad2086658df181369a09ad69dac260a41dbab814
DIFF: 
https://github.com/llvm/llvm-project/commit/ad2086658df181369a09ad69dac260a41dbab814.diff

LOG: [OpenMP][NVPTX] Take functions in `deviceRTLs` as `convergent`

OpenMP device compiler (similar to other SPMD compilers) assumes that
functions are convergent by default to avoid invalid transformations, such as
the bug (https://bugs.llvm.org/show_bug.cgi?id=49021).

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95971

(cherry picked from commit 0f0ce3c12edefd25448e39c4d20718a10d3d42c1)

Added: 
clang/test/OpenMP/target_attribute_convergent.cpp
openmp/libomptarget/test/offloading/bug49021.cpp

Modified: 
clang/lib/Frontend/CompilerInvocation.cpp

Removed: 




diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index d8be4ea14868..036388ebd355 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2470,6 +2470,8 @@ void CompilerInvocation::ParseLangArgs(LangOptions , 
ArgList ,
   bool IsTargetSpecified =
   Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
 
+  Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice;
+
   if (Opts.OpenMP || Opts.OpenMPSimd) {
 if (int Version = getLastArgIntValue(
 Args, OPT_fopenmp_version_EQ,

diff  --git a/clang/test/OpenMP/target_attribute_convergent.cpp 
b/clang/test/OpenMP/target_attribute_convergent.cpp
new file mode 100644
index ..932214e987c8
--- /dev/null
+++ b/clang/test/OpenMP/target_attribute_convergent.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s 
-fopenmp-is-device -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple 
nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s 
-fopenmp-is-device -o - | FileCheck %s
+// expected-no-diagnostics
+
+#pragma omp declare target
+
+void foo() {}
+
+#pragma omp end declare target
+
+// CHECK: Function Attrs: {{.*}}convergent{{.*}}
+// CHECK: define hidden void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]]
+// CHECK: attributes [[ATTRIBUTE_NUMBER]] = { {{.*}}convergent{{.*}} }

diff  --git a/openmp/libomptarget/test/offloading/bug49021.cpp 
b/openmp/libomptarget/test/offloading/bug49021.cpp
new file mode 100644
index ..bcdbf68b10e0
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/bug49021.cpp
@@ -0,0 +1,85 @@
+// RUN: %libomptarget-compilexx-aarch64-unknown-linux-gnu -O3 && 
%libomptarget-run-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compilexx-powerpc64-ibm-linux-gnu -O3 && 
%libomptarget-run-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-powerpc64le-ibm-linux-gnu -O3 && 
%libomptarget-run-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-x86_64-pc-linux-gnu -O3 && 
%libomptarget-run-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda -O3 && 
%libomptarget-run-nvptx64-nvidia-cuda
+
+#include 
+
+template  int test_map() {
+  std::cout << "map(complex<>)" << std::endl;
+  T a(0.2), a_check;
+#pragma omp target map(from : a_check)
+  { a_check = a; }
+
+  if (a_check != a) {
+std::cout << " wrong results";
+return 1;
+  }
+
+  return 0;
+}
+
+template  int test_reduction() {
+  std::cout << "flat parallelism" << std::endl;
+  T sum(0), sum_host(0);
+  const int size = 100;
+  T array[size];
+  for (int i = 0; i < size; i++) {
+array[i] = i;
+sum_host += array[i];
+  }
+
+#pragma omp target teams distribute parallel for map(to: array[:size]) 
\
+ reduction(+ : sum)
+  for (int i = 0; i < size; i++)
+sum += array[i];
+
+  if (sum != sum_host)
+std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
+
+  std::cout << "hierarchical parallelism" << std::endl;
+  const int nblock(10), block_size(10);
+  T block_sum[nblock];
+#pragma omp target teams distribute map(to 
\
+: array[:size])
\
+map(from   
\
+: block_sum[:nblock])
+  for (int ib = 0; ib < nblock; ib++) {
+T partial_sum = 0;
+const int istart = ib * block_size;
+const int iend = (ib + 1) * block_size;
+#pragma omp parallel for reduction(+ : partial_sum)
+for (int i = istart; i < iend; i++)
+  partial_sum += array[i];
+block_sum[ib] = partial_sum;
+  }
+
+  sum = 0;
+  for (int ib = 0; ib < nblock; ib++) {
+sum += block_sum[ib];
+  }
+
+  

[llvm-branch-commits] [clang] a9157c5 - [CSSPGO] Introducing distribution factor for pseudo probe.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Hongtao Yu
Date: 2021-02-03T19:28:31-08:00
New Revision: a9157c5628dc89b13936bbc8eef261cb02d63d40

URL: 
https://github.com/llvm/llvm-project/commit/a9157c5628dc89b13936bbc8eef261cb02d63d40
DIFF: 
https://github.com/llvm/llvm-project/commit/a9157c5628dc89b13936bbc8eef261cb02d63d40.diff

LOG: [CSSPGO] Introducing distribution factor for pseudo probe.

Sample re-annotation is required in LTO time to achieve a reasonable 
post-inline profile quality. However, we have seen that such LTO-time 
re-annotation degrades profile quality. This is mainly caused by preLTO code 
duplication that is done by passes such as loop unrolling, jump threading, 
indirect call promotion etc, where samples corresponding to a source location 
are aggregated multiple times due to the duplicates. In this change we are 
introducing a concept of distribution factor for pseudo probes so that samples 
can be distributed for duplicated probes scaled by a factor. We hope that 
optimizations duplicating code well-maintain the branch frequency information 
(BFI) based on which probe distribution factors are calculated. Distribution 
factors are updated at the end of preLTO pipeline to reflect an estimated 
portion of the real execution count.

This change also introduces a pseudo probe verifier that can be run after each 
IR passes to detect duplicated pseudo probes.

A saturated distribution factor stands for 1.0. A pesudo probe will carry a 
factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution 
factor field that represents [0.0, 1.0] is associated to each block probe. 
Unfortunately this cannot be done for callsite probes due to the size 
limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used 
instead.

Changes are also needed to the sample profile inliner to deal with prorated 
callsite counts. Call sites duplicated by PreLTO passes, when later on inlined 
in LTO time, should have the callees’s probe prorated based on the 
Prelink-computed distribution factors. The distribution factors should also be 
taken into account when computing hotness for inline candidates. Also, Indirect 
call promotion results in multiple callisites. The original samples should be 
distributed across them. This is fixed by adjusting the callisites' 
distribution factors.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D93264

(cherry picked from commit 3d89b3cbec230633e8228787819b15116c1a1730)

Added: 
llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll

Modified: 
clang/test/CodeGen/pseudo-probe-emit.c
llvm/include/llvm/IR/IntrinsicInst.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/IR/PseudoProbe.h
llvm/include/llvm/Passes/StandardInstrumentations.h
llvm/include/llvm/ProfileData/SampleProf.h
llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
llvm/lib/IR/PseudoProbe.cpp
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Passes/PassRegistry.def
llvm/lib/Passes/StandardInstrumentations.cpp
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll

Removed: 




diff  --git a/clang/test/CodeGen/pseudo-probe-emit.c 
b/clang/test/CodeGen/pseudo-probe-emit.c
index 059673b6992e..fccc8f04844d 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -6,12 +6,12 @@ void bar();
 void go();
 
 void foo(int x) {
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   if (x == 0)
-// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0)
+// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
 bar();
   else
-// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0)
+// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
 go();
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicInst.h 
b/llvm/include/llvm/IR/IntrinsicInst.h
index 9d68f3fdde6c..df3a1d568756 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -981,12 +981,16 @@ class PseudoProbeInst : public IntrinsicInst {
 return cast(const_cast(getArgOperand(0)));
   }
 
+  ConstantInt *getIndex() const {
+return cast(const_cast(getArgOperand(1)));
+  }
+
   ConstantInt 

[llvm-branch-commits] [llvm] c2f3f45 - [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Wenlei He
Date: 2021-02-03T19:28:30-08:00
New Revision: c2f3f45b5c5bd6f9b86a766fc40130b34acb8293

URL: 
https://github.com/llvm/llvm-project/commit/c2f3f45b5c5bd6f9b86a766fc40130b34acb8293
DIFF: 
https://github.com/llvm/llvm-project/commit/c2f3f45b5c5bd6f9b86a766fc40130b34acb8293.diff

LOG: [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline

Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO 
inlining and reduce similar code in the inlining loop, plus minor cleanup for 
AFDO path.

This is resubmit of D95024, with build break and overtighten assertion fixed.

Test Plan:

(cherry picked from commit 1645f465be85223e9f5b6303a3e5e0e491fd819f)

Added: 


Modified: 
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
llvm/test/Transforms/SampleProfile/remarks.ll

Removed: 




diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp 
b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 665c4078f3ee..2cfefd3a18ea 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -416,20 +416,18 @@ class SampleProfileLoader {
   findIndirectCallFunctionSamples(const Instruction , uint64_t ) const;
   mutable DenseMap 
DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction ) const;
-  CallBase *tryPromoteIndirectCall(Function , StringRef CalleeName,
-   uint64_t , uint64_t Count, CallBase *I,
-   const char *);
-  bool inlineCallInstruction(CallBase ,
- const FunctionSamples *CalleeSamples);
+  // Attempt to promote indirect call and also inline the promoted call
+  bool tryPromoteAndInlineCandidate(
+  Function , InlineCandidate , uint64_t ,
+  DenseSet ,
+  SmallVector *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function ,
   DenseSet );
-  // Helper functions call-site prioritized BFS inliner
-  // Will change the main FDO inliner to be work list based directly in
-  // upstream, then merge this change with that and remove the duplication.
   InlineCost shouldInlineCandidate(InlineCandidate );
   bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
-  bool tryInlineCandidate(InlineCandidate ,
-  SmallVector );
+  bool
+  tryInlineCandidate(InlineCandidate ,
+ SmallVector *InlinedCallSites = nullptr);
   bool
   inlineHotFunctionsWithPriority(Function ,
  DenseSet );
@@ -1077,70 +1075,46 @@ SampleProfileLoader::findFunctionSamples(const 
Instruction ) const {
   return it.first->second;
 }
 
-CallBase *
-SampleProfileLoader::tryPromoteIndirectCall(Function , StringRef CalleeName,
-uint64_t , uint64_t Count,
-CallBase *I, const char *) {
-  Reason = "Callee function not available";
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F  Caller function.
+/// \param Candidate  ICP and inline candidate.
+/// \param Sum  Sum of target counts for indirect call.
+/// \param PromotedInsns  Map to keep track of indirect call already processed.
+/// \param Candidate  ICP and inline candidate.
+/// \param InlinedCallSite  Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+Function , InlineCandidate , uint64_t ,
+DenseSet ,
+SmallVector *InlinedCallSite) {
+  const char *Reason = "Callee function not available";
   // R->getValue() !=  is to prevent promoting a recursive call.
   // If it is a recursive call, we do not inline it as it could bloat
   // the code exponentially. There is way to better handle this, e.g.
   // clone the caller first, and inline the cloned caller if it is
   // recursive. As llvm does not inline recursive calls, we will
   // simply ignore it instead of handling it explicitly.
-  auto R = SymbolMap.find(CalleeName);
+  auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
   if (R != SymbolMap.end() && R->getValue() &&
   !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
   R->getValue()->hasFnAttribute("use-sample-profile") &&
-  R->getValue() !=  && isLegalToPromote(*I, R->getValue(), )) {
+  R->getValue() !=  &&
+  isLegalToPromote(*Candidate.CallInstr, R->getValue(), )) {
 auto *DI =
-::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE);
-Sum -= Count;
-return DI;
-  }
-  return nullptr;
-}
-
-bool SampleProfileLoader::inlineCallInstruction(
-CallBase , const FunctionSamples *CalleeSamples) {
-  if (ExternalInlineAdvisor) {
-auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-if 

[llvm-branch-commits] [llvm] 27ff658 - [CSSPGO] Call site prioritized inlining for sample PGO

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Wenlei He
Date: 2021-02-03T19:28:30-08:00
New Revision: 27ff658e97528540e4425c0cb6400f3e5355f53a

URL: 
https://github.com/llvm/llvm-project/commit/27ff658e97528540e4425c0cb6400f3e5355f53a
DIFF: 
https://github.com/llvm/llvm-project/commit/27ff658e97528540e4425c0cb6400f3e5355f53a.diff

LOG: [CSSPGO] Call site prioritized inlining for sample PGO

This change implemented call site prioritized BFS profile guided inlining for 
sample profile loader. The new inlining strategy maximize the benefit of 
context-sensitive profile as mentioned in the follow up discussion of CSSPGO 
RFC. The change will not affect today's AutoFDO as it's opt-in. CSSPGO now 
defaults to the new FDO inliner, but can fall back to today's replay inliner 
using a switch (`-sample-profile-prioritized-inline=0`).

Motivation

With baseline AutoFDO, the inliner in sample profile loader only replays 
previous inlining, and the use of profile is only for pruning previous inlining 
that turned out to be cold. Due to the nature of replay, the FDO inliner is 
simple with hotness being the only decision factor. It has the following 
limitations that we're improving now for CSSPGO.
 - It doesn't take inline candidate size into account. Since it's doing replay, 
the size growth is bounded by previous CGSCC inlining. With context-sensitive 
profile, FDO inliner is no longer limited by previous inlining, so we need to 
take size into account to avoid significant size bloat.
 - The way it looks at hotness is not accurate. It uses total samples in an 
inlinee as proxy for hotness, while what really matters for an inline decision 
is the call site count. This is an unfortunate fall back because call site 
count and callee entry count are not reliable due to dwarf based correlation, 
especially for inlinees. Now paired with pseudo-probe, we have accurate call 
site count and callee's entry count, so we can use that to gauge hotness more 
accurately.
 - It treats all call sites from a block as hot as long as there's one call 
site considered hot. This is normally true, but since total samples is used as 
hotness proxy, this transitiveness within block magnifies the inacurate hotness 
heuristic. With pseduo-probe and the change above, this is no longer an issue 
for CSSPGO.

New FDO Inliner

Putting all the requirement for CSSPGO together, we need a top-down call site 
prioritized BFS inliner. Here're reasons why each component is needed.
 - Top-down: We need a top-down inliner to better leverage context-sensitive 
profile, so inlining is driven by accurate context profile, and post-inline is 
also accurate. This is already implemented in https://reviews.llvm.org/D70655.
 - Size Cap: For top-down inliner, taking function size into account for inline 
decision alone isn't sufficient to control size growth. We also need to 
explicitly cap size growth because with top-down inlining, we can grow inliner 
size significantly with large number of smaller inlinees even if each 
individually passes the cost/size check.
 - Prioritize call sites: With size cap, inlining order also becomes important, 
because if we stop inlining due to size budget limit, we'd want to use budget 
towards the most beneficial call sites.
 - BFS inline: Same as call site prioritization, if we stop inlining due to 
size budget limit, we want a balanced inline tree, rather than going deep on 
one call path.

Note that the new inliner avoids repeatedly evaluating same set of call site, 
so it should help with compile time too. For this reason, we could transition 
today's FDO inliner to use a queue with equal priority to avoid wasted 
reevaluation of same call site (TODO).

Speculative indirect call promotion and inlining is also supported now with 
CSSPGO just like baseline AutoFDO.

Tunings and knobs

I created tuning knobs for size growth/cap control, and for hot threshold 
separate from CGSCC inliner. The default values are selected based on initial 
tuning with CSSPGO.

Results

Evaluated with an internal LLVM fork couple months ago, plus another change to 
adjust hot-threshold cutoff for context profile (will send up after this one), 
the new inliner show ~1% geomean perf win on spec2006 with CSSPGO, while 
reducing code size too. The measurement was done using train-train setup, 
MonoLTO w/ new pass manager and pseudo-probe. Note that this is just a starting 
point - we hope that the new inliner will open up more opportunity with CSSPGO, 
but it will certainly take more time and effort to make it fully calibrated and 
ready for bigger workloads (we're working on it).

Differential Revision: https://reviews.llvm.org/D94001

(cherry picked from commit 6bae5973c476e16dbbc82030d65c7859a6628e89)

Added: 
llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof
llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
llvm/test/Transforms/SampleProfile/csspgo-inline.ll

Modified: 

[llvm-branch-commits] [clang] b9fa16f - [CSSPGO] Passing the clang driver switch -fpseudo-probe-for-profiling to the linker.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Hongtao Yu
Date: 2021-02-03T19:28:30-08:00
New Revision: b9fa16f2234edddf6e0f449a0e7b646ee9046cf3

URL: 
https://github.com/llvm/llvm-project/commit/b9fa16f2234edddf6e0f449a0e7b646ee9046cf3
DIFF: 
https://github.com/llvm/llvm-project/commit/b9fa16f2234edddf6e0f449a0e7b646ee9046cf3.diff

LOG: [CSSPGO] Passing the clang driver switch -fpseudo-probe-for-profiling to 
the linker.

As titled.

Reviewed By: wmi, wenlei

Differential Revision: https://reviews.llvm.org/D95271

(cherry picked from commit d3e2e3740d0730cb6788c771bb01a8f3e935bf2e)

Added: 
clang/test/Driver/pseudo-probe-lto.c

Modified: 
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/CommonArgs.cpp

Removed: 




diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 42c5319041d0..1f6c13d5cc96 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1147,7 +1147,7 @@ def fprofile_update_EQ : Joined<["-"], 
"fprofile-update=">,
 defm pseudo_probe_for_profiling : BoolFOption<"pseudo-probe-for-profiling",
   CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse,
   PosFlag, NegFlag,
-  BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiler">>;
+  BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample 
profiling">>;
 def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">,
 Group, Flags<[CC1Option, CoreOption]>,
 HelpText<"Generate instrumented code to collect order file into 
default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env 
var)">;

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6a95aa5ec628..bcaea71dca94 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -605,6 +605,11 @@ void tools::addLTOOptions(const ToolChain , 
const ArgList ,
   CmdArgs.push_back("-plugin-opt=new-pass-manager");
   }
 
+  // Pass an option to enable pseudo probe emission.
+  if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
+   options::OPT_fno_pseudo_probe_for_profiling, false))
+CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling");
+
   // Setup statistics file output.
   SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
   if (!StatsFile.empty())

diff  --git a/clang/test/Driver/pseudo-probe-lto.c 
b/clang/test/Driver/pseudo-probe-lto.c
new file mode 100644
index ..e319b8c0098b
--- /dev/null
+++ b/clang/test/Driver/pseudo-probe-lto.c
@@ -0,0 +1,10 @@
+// RUN: touch %t.o
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 
-fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto=thin 
-fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 
-fno-pseudo-probe-for-profiling -fpseudo-probe-for-profiling 2>&1 | FileCheck 
%s --check-prefix=PROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 2>&1 | FileCheck 
%s --check-prefix=NOPROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 
-fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 
-fpseudo-probe-for-profiling -fno-pseudo-probe-for-profiling 2>&1 | FileCheck 
%s --check-prefix=NOPROBE
+
+// PROBE: -plugin-opt=pseudo-probe-for-profiling
+// NOPROBE-NOT: -plugin-opt=pseudo-probe-for-profiling



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f2cabaa - [CSSPGO] Tweaking inlining with pseudo probes.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Hongtao Yu
Date: 2021-02-03T19:28:29-08:00
New Revision: f2cabaac9525ba4b86301136e21ec9aad6aaf326

URL: 
https://github.com/llvm/llvm-project/commit/f2cabaac9525ba4b86301136e21ec9aad6aaf326
DIFF: 
https://github.com/llvm/llvm-project/commit/f2cabaac9525ba4b86301136e21ec9aad6aaf326.diff

LOG: [CSSPGO] Tweaking inlining with pseudo probes.

Fixing up a couple places where `getCallSiteIdentifier` is needed to support 
pseudo-probe-based callsites.

Also fixing an issue in the extbinary profile reader where the metadata section 
is not fully scanned based on the number of profiles loaded only for the 
current module.

Reviewed By: wmi, wenlei

Differential Revision: https://reviews.llvm.org/D95791

(cherry picked from commit 224fee8219bb3aed34f13ce40935e1b3ede90a0f)

Added: 
llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof
llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll

Modified: 
llvm/lib/ProfileData/SampleProfReader.cpp
llvm/lib/Transforms/IPO/SampleContextTracker.cpp

Removed: 




diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp 
b/llvm/lib/ProfileData/SampleProfReader.cpp
index c9f41687c356..370ffc8e2885 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -883,7 +883,7 @@ std::error_code 
SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
   if (!ProfileIsProbeBased)
 return sampleprof_error::success;
-  for (unsigned I = 0; I < Profiles.size(); ++I) {
+  while (Data < End) {
 auto FName(readStringFromTable());
 if (std::error_code EC = FName.getError())
   return EC;
@@ -893,8 +893,13 @@ std::error_code 
SampleProfileReaderExtBinaryBase::readFuncMetadata() {
   return EC;
 
 SampleContext FContext(*FName);
-Profiles[FContext].setFunctionHash(*Checksum);
+// No need to load metadata for profiles that are not loaded in the current
+// module.
+if (Profiles.count(FContext))
+  Profiles[FContext].setFunctionHash(*Checksum);
   }
+
+  assert(Data == End && "More data is read than expected");
   return sampleprof_error::success;
 }
 

diff  --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp 
b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 660d79de667c..fad72985dedd 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -308,8 +308,7 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
 return;
 
   // Get the context that needs to be promoted
-  LineLocation CallSite(FunctionSamples::getOffset(DIL),
-DIL->getBaseDiscriminator());
+  LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
   ContextTrieNode *NodeToPromo =
   CallerNode->getChildContext(CallSite, CalleeName);
   if (!NodeToPromo)
@@ -370,9 +369,7 @@ SampleContextTracker::getCalleeContextFor(const DILocation 
*DIL,
 return nullptr;
 
   return CallContext->getChildContext(
-  LineLocation(FunctionSamples::getOffset(DIL),
-   DIL->getBaseDiscriminator()),
-  CalleeName);
+  FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
 }
 
 ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
@@ -386,8 +383,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const 
DILocation *DIL) {
 if (Name.empty())
   Name = PrevDIL->getScope()->getSubprogram()->getName();
 S.push_back(
-std::make_pair(LineLocation(FunctionSamples::getOffset(DIL),
-DIL->getBaseDiscriminator()), Name));
+std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+   
PrevDIL->getScope()->getSubprogram()->getLinkageName()));
 PrevDIL = DIL;
   }
 

diff  --git 
a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof 
b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof
new file mode 100644
index ..fd3ff773e85d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof
@@ -0,0 +1,18 @@
+[foo]:23:23
+ 1: 23
+ 2: 23 zen:23
+ !CFGChecksum: 281479271677951
+[foo:2 @ zen]:765858:23
+ 1: 23
+ 2: 382920
+ 3: 382915
+ !CFGChecksum: 138828622701
+[bar]:23:23
+ 1: 23
+ 2: 23 zen:23
+ !CFGChecksum: 281479271677951
+[bar:2 @ zen]:765858:23
+ 1: 23
+ 2: 382920
+ 3: 382915
+ !CFGChecksum: 138828622701
\ No newline at end of file

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll 
b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
new file mode 100644
index ..a5033a0dc190
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -0,0 +1,175 @@
+; RUN: opt < %s -passes=pseudo-probe,sample-profile 
-sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S 

[llvm-branch-commits] [llvm] 7d096f9 - [CSSPGO] Support of CS profiles in extended binary format.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Hongtao Yu
Date: 2021-02-03T19:28:29-08:00
New Revision: 7d096f9bb350429628c6befce8f94dba4bbc6db9

URL: 
https://github.com/llvm/llvm-project/commit/7d096f9bb350429628c6befce8f94dba4bbc6db9
DIFF: 
https://github.com/llvm/llvm-project/commit/7d096f9bb350429628c6befce8f94dba4bbc6db9.diff

LOG: [CSSPGO] Support of CS profiles in extended binary format.

This change brings up support of context-sensitive profiles in the format of 
extended binary. Existing sample profile reader/writer/merger code is being 
tweaked to reflect the fact of bracketed input contexts, like (`[...]`). The 
paired brackets are also needed in extbinary profiles because we don't yet have 
an otherwise good way to tell calling contexts apart from regular function 
names since the context delimiter `@` can somehow serve as a part of the C++ 
mangled names.

Reviewed By: wmi, wenlei

Differential Revision: https://reviews.llvm.org/D95547

(cherry picked from commit 7e99bddfeaab2713a8bb6ca538da25b66e6efc59)

Added: 
llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext
llvm/test/tools/llvm-profdata/cs-sample-profile.test

Modified: 
llvm/include/llvm/ProfileData/SampleProf.h
llvm/include/llvm/ProfileData/SampleProfReader.h
llvm/lib/ProfileData/SampleProfReader.cpp
llvm/lib/ProfileData/SampleProfWriter.cpp
llvm/lib/Transforms/IPO/SampleContextTracker.cpp
llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/tools/llvm-profgen/ProfileGenerator.cpp

Removed: 




diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h 
b/llvm/include/llvm/ProfileData/SampleProf.h
index c45ace9e68c1..346bc4c81d86 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -439,9 +439,11 @@ class SampleContext {
   void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
   bool hasContext() const { return State != UnknownContext; }
   bool isBaseContext() const { return CallingContext.empty(); }
-  StringRef getName() const { return Name; }
+  StringRef getNameWithoutContext() const { return Name; }
   StringRef getCallingContext() const { return CallingContext; }
-  StringRef getNameWithContext() const { return FullContext; }
+  StringRef getNameWithContext(bool WithBracket = false) const {
+return WithBracket ? InputContext : FullContext;
+  }
 
 private:
   // Give a context string, decode and populate internal states like
@@ -449,6 +451,7 @@ class SampleContext {
   // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
   void setContext(StringRef ContextStr, ContextStateMask CState) {
 assert(!ContextStr.empty());
+InputContext = ContextStr;
 // Note that `[]` wrapped input indicates a full context string, otherwise
 // it's treated as context-less function name only.
 bool HasContext = ContextStr.startswith("[");
@@ -480,6 +483,9 @@ class SampleContext {
 }
   }
 
+  // Input context string including bracketed calling context and leaf function
+  // name
+  StringRef InputContext;
   // Full context string including calling context and leaf function name
   StringRef FullContext;
   // Function name for the associated sample profile
@@ -676,7 +682,8 @@ class FunctionSamples {
 Name = Other.getName();
 if (!GUIDToFuncNameMap)
   GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
-
+if (Context.getNameWithContext(true).empty())
+  Context = Other.getContext();
 if (FunctionHash == 0) {
   // Set the function hash code for the target profile.
   FunctionHash = Other.getFunctionHash();
@@ -743,8 +750,10 @@ class FunctionSamples {
   StringRef getName() const { return Name; }
 
   /// Return function name with context.
-  StringRef getNameWithContext() const {
-return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
+  StringRef getNameWithContext(bool WithBracket = false) const {
+return FunctionSamples::ProfileIsCS
+   ? Context.getNameWithContext(WithBracket)
+   : Name;
   }
 
   /// Return the original function name.

diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h 
b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 3f52a2f6163b..999e75eddffa 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -488,8 +488,12 @@ class SampleProfileReader {
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
+  /// Whether function profiles are context-sensitive.
   bool ProfileIsCS = false;
 
+  /// Number of context-sensitive profiles.
+  uint32_t CSProfileCount = 0;
+
   /// \brief The format of sample.
   SampleProfileFormat Format = SPF_None;
 };

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp 
b/llvm/lib/ProfileData/SampleProfReader.cpp
index c42931174bc0..c9f41687c356 

[llvm-branch-commits] [openmp] f5602e0 - [OpenMP] Disabled profiling in `libomp` by default to unblock link errors

2021-02-03 Thread Shilei Tian via llvm-branch-commits

Author: Shilei Tian
Date: 2021-02-03T19:18:08-05:00
New Revision: f5602e0bf31ab590da19fa357980a753dbfd666e

URL: 
https://github.com/llvm/llvm-project/commit/f5602e0bf31ab590da19fa357980a753dbfd666e
DIFF: 
https://github.com/llvm/llvm-project/commit/f5602e0bf31ab590da19fa357980a753dbfd666e.diff

LOG: [OpenMP] Disabled profiling in `libomp` by default to unblock link errors

Link error occurred when time profiling in libomp is enabled by default
because `libomp` is assumed to be a C library but the dependence on
`libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all
OpenMP tests in Phabricator.

This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to
enable/disable the feature. By default it is disabled. Note that once time
profiling is enabled for `libomp`, it becomes a C++ library.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95585

(cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2)

Added: 


Modified: 
openmp/CMakeLists.txt
openmp/docs/design/Runtimes.rst
openmp/runtime/CMakeLists.txt
openmp/runtime/src/CMakeLists.txt
openmp/runtime/src/kmp_config.h.cmake
openmp/runtime/src/kmp_runtime.cpp

Removed: 




diff  --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index 67600bebdafb..4787d4b5a321 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -86,6 +86,12 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building 
libomptarget for offloading."
${ENABLE_LIBOMPTARGET})
 option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for 
libomptarget."
${ENABLE_LIBOMPTARGET})
+option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF)
+
+# Build host runtime library, after LIBOMPTARGET variables are set since they 
are needed
+# to enable time profiling support in the OpenMP runtime.
+add_subdirectory(runtime)
+
 if (OPENMP_ENABLE_LIBOMPTARGET)
   # Check that the library can actually be built.
   if (APPLE OR WIN32)

diff  --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 016b88ba324b..ad36e43eccdc 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a 
JSON file based on
 `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_
 for time trace output. Using this library is enabled by default when building
 using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output 
will
-be saved to the filename specified by the environment variable.
+be saved to the filename specified by the environment variable. For 
multi-threaded
+applications, profiling in ``libomp`` is also needed. Setting the CMake option
+``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this 
will
+turn ``libomp`` into a C++ library.
 
 .. _`Chrome Tracing`: 
https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
 

diff  --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 9fdd04f41646..8828ff8ef455 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD})
   # Should assertions be enabled?  They are on by default.
   set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
 "enable assertions?")
-  set(LIBOMPTARGET_PROFILING_SUPPORT FALSE)
 else() # Part of LLVM build
   # Determine the native architecture from LLVM.
   string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH)
@@ -66,10 +65,11 @@ else() # Part of LLVM build
 libomp_get_architecture(LIBOMP_ARCH)
   endif ()
   set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
-  # Time profiling support
-  set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING})
 endif()
 
+# Time profiling support
+set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING})
+
 # FUJITSU A64FX is a special processor because its cache line size is 256.
 # We need to pass this information into kmp_config.h.
 if(LIBOMP_ARCH STREQUAL "aarch64")

diff  --git a/openmp/runtime/src/CMakeLists.txt 
b/openmp/runtime/src/CMakeLists.txt
index 2e927df84f5c..822f9ca2b825 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -50,6 +50,14 @@ if(${LIBOMP_USE_HWLOC})
   include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
 endif()
 
+# Building with time profiling support requires LLVM directory includes.
+if(LIBOMP_PROFILING_SUPPORT)
+  include_directories(
+${LLVM_MAIN_INCLUDE_DIR}
+${LLVM_INCLUDE_DIR}
+  )
+endif()
+
 # Getting correct source files to build library
 set(LIBOMP_CXXFILES)
 set(LIBOMP_ASMFILES)
@@ -135,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS)
 
 libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS)
 # Build libomp library. Add LLVMSupport dependency if building in-tree with 

[llvm-branch-commits] [clang] 2a917b7 - Extend release notes for AST Matchers changes

2021-02-03 Thread Stephen Kelly via llvm-branch-commits

Author: Stephen Kelly
Date: 2021-02-03T23:05:27Z
New Revision: 2a917b70e770e2d25d96f91beebf2a3e52bb9e66

URL: 
https://github.com/llvm/llvm-project/commit/2a917b70e770e2d25d96f91beebf2a3e52bb9e66
DIFF: 
https://github.com/llvm/llvm-project/commit/2a917b70e770e2d25d96f91beebf2a3e52bb9e66.diff

LOG: Extend release notes for AST Matchers changes

Added: 


Modified: 
clang/docs/ReleaseNotes.rst

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a34cd512ca59..9efd4c01f053 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -250,15 +250,41 @@ release of Clang. Users of the build system should adjust 
accordingly.
 AST Matchers
 
 
-- The behavior of TK_IgnoreUnlessSpelledInSource with the traverse() matcher
-  has been changed to no longer match on template instantiations or on
+- The ``mapAnyOf()`` matcher was added. This allows convenient matching of
+  
diff erent AST nodes which have a compatible matcher API. For example,
+  ``mapAnyOf(ifStmt, forStmt).with(hasCondition(integerLiteral()))``
+  matches any ``IfStmt`` or ``ForStmt`` with a integer literal as the
+  condition.
+
+- The ``binaryOperation()`` matcher allows matching expressions which
+  appear like binary operators in the code, even if they are really
+  ``CXXOperatorCallExpr`` for example. It is based on the ``mapAnyOf()``
+  matcher functionality. The matcher API for the latter node has been
+  extended with ``hasLHS()`` etc to facilitate the abstraction.
+
+- Matcher API for ``CXXRewrittenBinaryOperator`` has been added. In addition
+  to explicit matching with the ``cxxRewrittenBinaryOperator()`` matcher, the
+  ``binaryOperation()`` matches on nodes of this type.
+
+- The behavior of ``TK_IgnoreUnlessSpelledInSource`` with the ``traverse()``
+  matcher has been changed to no longer match on template instantiations or on
   implicit nodes which are not spelled in the source.
 
-- The TK_IgnoreImplicitCastsAndParentheses traversal kind was removed. It
-  is recommended to use TK_IgnoreUnlessSpelledInSource instead.
+- The ``TK_IgnoreImplicitCastsAndParentheses`` traversal kind was removed. It
+  is recommended to use ``TK_IgnoreUnlessSpelledInSource`` instead.
 
-- The behavior of the forEach() matcher was changed to not internally ignore
-  implicit and parenthesis nodes.
+- The behavior of the ``forEach()`` matcher was changed to not internally
+  ignore implicit and parenthesis nodes.  This makes it consistent with
+  the ``has()`` matcher.  Uses of ``forEach()`` relying on the old behavior
+  can now use the  ``traverse()`` matcher or ``ignoringParenCasts()``.
+
+- Several AST Matchers have been changed to match based on the active
+  traversal mode.  For example, ``argumentCountIs()`` matches the number of
+  arguments written in the source, ignoring default arguments represented
+  by ``CXXDefaultArgExpr`` nodes.
+
+- Improvements in AST Matchers allow more matching of template declarations,
+  independent of their template instantations.
 
 clang-format
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 678c259 - PR44325 (and duplicates): don't issue -Wzero-as-null-pointer-constant

2021-02-03 Thread Richard Smith via llvm-branch-commits

Author: Richard Smith
Date: 2021-02-03T14:59:48-08:00
New Revision: 678c259d277135ef32861887a8ac8618deba5f24

URL: 
https://github.com/llvm/llvm-project/commit/678c259d277135ef32861887a8ac8618deba5f24
DIFF: 
https://github.com/llvm/llvm-project/commit/678c259d277135ef32861887a8ac8618deba5f24.diff

LOG: PR44325 (and duplicates): don't issue -Wzero-as-null-pointer-constant
when rewriting 'a < b' as '(a <=> b) < 0'.

It's pretty common for comparison category types to use a pointer or
pointer-to-member type as their '0' parameter.

(cherry picked from commit 1f06f41993b6363e6b2c4f22a13488a3e687f31b)

Added: 


Modified: 
clang/lib/Sema/Sema.cpp
clang/test/SemaCXX/cxx2a-three-way-comparison.cpp

Removed: 




diff  --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 55cb3aee6194..cb5a84a31235 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -537,6 +537,13 @@ void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, 
const Expr* E) {
   if (E->IgnoreParenImpCasts()->getType()->isNullPtrType())
 return;
 
+  // Don't diagnose the conversion from a 0 literal to a null pointer argument
+  // in a synthesized call to operator<=>.
+  if (!CodeSynthesisContexts.empty() &&
+  CodeSynthesisContexts.back().Kind ==
+  CodeSynthesisContext::RewritingOperatorAsSpaceship)
+return;
+
   // If it is a macro from system header, and if the macro name is not "NULL",
   // do not warn.
   SourceLocation MaybeMacroLoc = E->getBeginLoc();

diff  --git a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp 
b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp
index 353360e052bb..b94225274fff 100644
--- a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp
+++ b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++2a -verify %s
+// RUN: %clang_cc1 -std=c++2a -verify %s -Wzero-as-null-pointer-constant
 
 // Keep this test before any declarations of operator<=>.
 namespace PR44786 {
@@ -40,3 +40,21 @@ namespace PR47893 {
   int (...);
   int  = f(A(), A());
 }
+
+namespace PR44325 {
+  struct cmp_cat {};
+  bool operator<(cmp_cat, void*);
+  bool operator>(cmp_cat, int cmp_cat::*);
+
+  struct X {};
+  cmp_cat operator<=>(X, X);
+
+  bool b1 = X() < X(); // no warning
+  bool b2 = X() > X(); // no warning
+
+  // FIXME: It's not clear whether warning here is useful, but we can't really
+  // tell that this is a comparison category in general. This is probably OK,
+  // as comparisons against zero are only really intended for use in the
+  // implicit rewrite rules, not for explicit use by programs.
+  bool c = cmp_cat() < 0; // expected-warning {{zero as null pointer constant}}
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] 922e414 - [OpenMP] Fix seg fault in libomptarget when using Info with multiple threads

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Joseph Huber
Date: 2021-02-03T14:25:51-08:00
New Revision: 922e4149d16754b54ce225faa3e769d32937d7ad

URL: 
https://github.com/llvm/llvm-project/commit/922e4149d16754b54ce225faa3e769d32937d7ad
DIFF: 
https://github.com/llvm/llvm-project/commit/922e4149d16754b54ce225faa3e769d32937d7ad.diff

LOG: [OpenMP] Fix seg fault in libomptarget when using Info with multiple 
threads

Summary:
One option for the LIBOMPTARGET_INFO environment variable is to print the 
current status of the device's data mappings. These are a shared resource among 
threads so this needs to be protected when using multiple streams.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95786

(cherry picked from commit fda48539988d2a1bdb6395799151e9090312a20b)

Added: 


Modified: 
openmp/libomptarget/src/interface.cpp
openmp/libomptarget/src/private.h

Removed: 




diff  --git a/openmp/libomptarget/src/interface.cpp 
b/openmp/libomptarget/src/interface.cpp
index cf6d36960c75c..01f3715d6bcc8 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -58,7 +58,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = 
nullptr) {
   case tgt_mandatory:
 if (!success) {
   if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
-for (const auto  : PM->Devices)
+for (auto  : PM->Devices)
   dumpTargetPointerMappings(loc, Device);
   else
 FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target "
@@ -76,7 +76,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = 
nullptr) {
   1, "failure of target construct while offloading is mandatory");
 } else {
   if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
-for (const auto  : PM->Devices)
+for (auto  : PM->Devices)
   dumpTargetPointerMappings(loc, Device);
 }
 break;

diff  --git a/openmp/libomptarget/src/private.h 
b/openmp/libomptarget/src/private.h
index fb6f681d3020c..3b0e57dfe15ed 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -99,7 +99,7 @@ int __kmpc_get_target_offload(void) __attribute__((weak));
 

 /// dump a table of all the host-target pointer pairs on failure
 static inline void dumpTargetPointerMappings(const ident_t *Loc,
- const DeviceTy ) {
+ DeviceTy ) {
   if (Device.HostDataToTargetMap.empty())
 return;
 
@@ -109,6 +109,7 @@ static inline void dumpTargetPointerMappings(const ident_t 
*Loc,
Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn());
   INFO(OMP_INFOTYPE_ALL, Device.DeviceID, "%-18s %-18s %s %s %s\n", "Host Ptr",
"Target Ptr", "Size (B)", "RefCount", "Declaration");
+  Device.DataMapMtx.lock();
   for (const auto  : Device.HostDataToTargetMap) {
 SourceInfo Info(HostTargetMap.HstPtrName);
 INFO(OMP_INFOTYPE_ALL, Device.DeviceID,
@@ -118,6 +119,7 @@ static inline void dumpTargetPointerMappings(const ident_t 
*Loc,
  HostTargetMap.getRefCount(), Info.getName(), Info.getFilename(),
  Info.getLine(), Info.getColumn());
   }
+  Device.DataMapMtx.unlock();
 }
 
 




___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] 255f739 - [OpenMP][NFC] Added release note for new `deviceRTLs` and hidden helper task

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Shilei Tian
Date: 2021-02-03T14:25:11-08:00
New Revision: 255f7398845a7cfb47aef53e40b68057ec56839e

URL: 
https://github.com/llvm/llvm-project/commit/255f7398845a7cfb47aef53e40b68057ec56839e
DIFF: 
https://github.com/llvm/llvm-project/commit/255f7398845a7cfb47aef53e40b68057ec56839e.diff

LOG: [OpenMP][NFC] Added release note for new `deviceRTLs` and hidden helper 
task

Added release note for new `deviceRTLs` and hidden helper task for LLVM
12.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95584

(cherry picked from commit 7bc31018f71cac22b7060c49cefb6f3d0d2e2069)

Added: 


Modified: 
openmp/docs/ReleaseNotes.rst

Removed: 




diff  --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst
index 7f40d3c81510c..cb3464ad84f06 100644
--- a/openmp/docs/ReleaseNotes.rst
+++ b/openmp/docs/ReleaseNotes.rst
@@ -7,7 +7,7 @@ OpenMP 12.0.0 Release Notes
These are in-progress notes for the upcoming LLVM 12.0.0 release.
Release notes for previous releases can be found on
`the Download Page `_.
-   
+
 
 Introduction
 
@@ -44,3 +44,27 @@ Non-comprehensive list of changes in this release
   ``LIBOMPTARGET_INFO`` allows the user to request certain information from the
   ``libomptarget`` runtime using a 32-bit field. A full description of each
   environment variable is described :ref:`here 
`.
+
+- ``target nowait`` was supported via hidden helper task, which is a task not
+  bound to any parallel region. A hidden helper team with a number of threads 
is
+  created when the first hidden helper task is encountered. The number of 
threads
+  can be configured via the environment variable
+  ``LIBOMP_NUM_HIDDEN_HELPER_THREADS``. By default it is 8. If
+  ``LIBOMP_NUM_HIDDEN_HELPER_THREADS=0``, hidden helper task is disabled and
+  falls back to a regular OpenMP task. It can also be disabled by setting the
+  environment variable ``LIBOMP_USE_HIDDEN_HELPER_TASK=OFF``.
+
+- ``deviceRTLs`` for NVPTX platform is CUDA free now. It is generally OpenMP 
code.
+  Target dependent parts are implemented with Clang/LLVM/NVVM intrinsics. CUDA
+  SDK is also dropped as a dependence to build the device runtime, which means
+  device runtime can also be built on a CUDA free system. However, it is
+  disabled by default. Set the CMake variable
+  ``LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`` to enable the build of NVPTX device
+  runtime on a CUDA free system. ``gcc-multilib`` and ``g++-multilib`` are
+  required. If CUDA is found, the device runtime will be built by default.
+
+  - Static NVPTX device runtime library (``libomptarget-nvptx.a``) was dropped.
+  A bitcode library is required to build an OpenMP program. If the library is
+  not found in the default path or any of the paths defined by 
``LIBRARY_PATH``,
+  an error will be raised. User can also specify the path to the bitcode device
+  library via ``--libomptarget-nvptx-bc-path=``.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] 5d926bb - [OpenMP][deviceRTLs] Added `[[clang::loader_uninitialized]]` explicitly

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Shilei Tian
Date: 2021-02-03T14:25:01-08:00
New Revision: 5d926bb3c46848c704833e0f02884395609388a3

URL: 
https://github.com/llvm/llvm-project/commit/5d926bb3c46848c704833e0f02884395609388a3
DIFF: 
https://github.com/llvm/llvm-project/commit/5d926bb3c46848c704833e0f02884395609388a3.diff

LOG: [OpenMP][deviceRTLs] Added `[[clang::loader_uninitialized]]` explicitly

`[[clang::loader_uninitialized]]` is in macro `SHARED` but it doesn't
work for array like `parallelLevel`, so the variable will be zero initialized.
There is also a similar issue for `omptarget_nvptx_device_State` which is in
global address space. Its c'tor is also generated, which was not in the past 
when
building the `deviceRTLs` with CUDA. In this patch, we added the attribute to
the two variables explicitly.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95550

(cherry picked from commit 19248d30e4ed5250fa84abbbd52fc7b835918a45)

Added: 


Modified: 
openmp/libomptarget/deviceRTLs/common/src/omp_data.cu

Removed: 




diff  --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu 
b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
index b91afd7476fea..4736d07108e03 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
@@ -25,7 +25,8 @@ DEVICE omptarget_device_environmentTy 
omptarget_device_environment;
 // global data holding OpenMP state information
 

 
-DEVICE
+// OpenMP will try to call its ctor if we don't add the attribute explicitly
+[[clang::loader_uninitialized]] DEVICE
 omptarget_nvptx_Queue
 omptarget_nvptx_device_State[MAX_SM];
 
@@ -33,7 +34,9 @@ DEVICE omptarget_nvptx_SimpleMemoryManager 
omptarget_nvptx_simpleMemoryManager;
 DEVICE uint32_t SHARED(usedMemIdx);
 DEVICE uint32_t SHARED(usedSlotIdx);
 
-DEVICE uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+// SHARED doesn't work with array so we add the attribute explicitly.
+[[clang::loader_uninitialized]] DEVICE uint8_t
+parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
 #pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc)
 DEVICE uint16_t SHARED(threadLimit);
 DEVICE uint16_t SHARED(threadsInTeam);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] 4d0874c - [OpenMP][NVPTX] Added the missing -O1 when building NVPTX bitcode libraries

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Shilei Tian
Date: 2021-02-03T14:24:51-08:00
New Revision: 4d0874c72a0a3f53eb3084a1ea3ee4456ab6e004

URL: 
https://github.com/llvm/llvm-project/commit/4d0874c72a0a3f53eb3084a1ea3ee4456ab6e004
DIFF: 
https://github.com/llvm/llvm-project/commit/4d0874c72a0a3f53eb3084a1ea3ee4456ab6e004.diff

LOG: [OpenMP][NVPTX] Added the missing -O1 when building NVPTX bitcode libraries

In the past `-O1` was used when building NVPTX bitcode libraries. After
we switched to OpenMP, `-O1` was missing by mistake, leading to a huge 
performance
regression.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95545

(cherry picked from commit 5a64794bbad4010778406dfee7748e6080258dbf)

Added: 


Modified: 
openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

Removed: 




diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt 
b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 23efbba29d66..eeda137ef120 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -126,14 +126,14 @@ set(cuda_src_files
 )
 
 # Set flags for LLVM Bitcode compilation.
-set(bc_flags -S -x c++
-  -target nvptx64
-  -Xclang -emit-llvm-bc
-  -Xclang -aux-triple -Xclang ${aux_triple}
-  -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
-  -D__CUDACC__
-  -I${devicertl_base_directory}
-  -I${devicertl_nvptx_directory}/src)
+set(bc_flags -S -x c++ -O1 -std=c++14
+ -target nvptx64
+ -Xclang -emit-llvm-bc
+ -Xclang -aux-triple -Xclang ${aux_triple}
+ -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
+ -D__CUDACC__
+ -I${devicertl_base_directory}
+ -I${devicertl_nvptx_directory}/src)
 
 if(${LIBOMPTARGET_NVPTX_DEBUG})
   list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=-1)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] 12b6579 - [OpenMP][Libomptarget] Fix conditional in CMake for remote plugin

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Atmn Patel
Date: 2021-02-03T14:23:22-08:00
New Revision: 12b6579b79dc21e9e54e74520ece0d571a640d4b

URL: 
https://github.com/llvm/llvm-project/commit/12b6579b79dc21e9e54e74520ece0d571a640d4b
DIFF: 
https://github.com/llvm/llvm-project/commit/12b6579b79dc21e9e54e74520ece0d571a640d4b.diff

LOG: [OpenMP][Libomptarget] Fix conditional in CMake for remote plugin

The remote offloading plugin's CMakeLists was trying to build if its
flag was enabled even if it didn't find gRPC/protobuf. The conditional
was wrong, it's fixed by this.

Differential Revision: https://reviews.llvm.org/D95574

(cherry picked from commit 8a77056256d9970387595a5c729d894e3fe07131)

Added: 


Modified: 
openmp/libomptarget/plugins/remote/CMakeLists.txt

Removed: 




diff  --git a/openmp/libomptarget/plugins/remote/CMakeLists.txt 
b/openmp/libomptarget/plugins/remote/CMakeLists.txt
index 1baa1125f44ca..989c74642c66e 100644
--- a/openmp/libomptarget/plugins/remote/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/remote/CMakeLists.txt
@@ -42,12 +42,13 @@ if (Protobuf_FOUND AND gRPC_FOUND AND PROTOC AND 
GRPC_CPP_PLUGIN)
   set(GRPC_INCLUDE_DIR
   ${directory}
   )
+
+  set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/)
+  set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/)
+  
+  add_subdirectory(src)
+  add_subdirectory(server)
 else()
   libomptarget_say("Not building remote offloading plugin: required libraries 
were not found.")
 endif()
 
-set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/)
-set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/)
-
-add_subdirectory(src)
-add_subdirectory(server)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e2d822c - [elfabi] Fix tests which failed on different timezones

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Haowei Wu
Date: 2021-02-03T14:18:00-08:00
New Revision: e2d822c3bdf6388c6ef21f35745105aba064d16d

URL: 
https://github.com/llvm/llvm-project/commit/e2d822c3bdf6388c6ef21f35745105aba064d16d
DIFF: 
https://github.com/llvm/llvm-project/commit/e2d822c3bdf6388c6ef21f35745105aba064d16d.diff

LOG: [elfabi] Fix tests which failed on different timezones

This patch fixes elfabi tests on machines using a GMT+X timezone
settings.

Differential Revision: https://reviews.llvm.org/D95641

(cherry picked from commit 771b35965457ebd5faaed8a1c3d2bcefffe721a3)

Added: 


Modified: 
llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test

Removed: 




diff  --git a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test 
b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
index c399029e0337..9742a61aa281 100644
--- a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
+++ b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
@@ -1,9 +1,9 @@
 ## Test writing unchanged content to ELF Stub file with --write-if-changed 
flag.
 
 # RUN: llvm-elfabi %s --output-target=elf64-little %t
-# RUN: touch -m -t 19700101 %t
+# RUN: env TZ=GMT touch -m -t 19700101 %t
 # RUN: llvm-elfabi %s --output-target=elf64-little %t --write-if-changed
-# RUN: ls -l %t | FileCheck %s
+# RUN: env TZ=GMT ls -l %t | FileCheck %s
 
 --- !tapi-tbe
 TbeVersion: 1.0

diff  --git a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test 
b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test
index 89cad7733eee..3ec190067c73 100644
--- a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test
+++ b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test
@@ -1,8 +1,8 @@
 ## Test writing unchanged content to TBE file with --write-if-changed flag.
 
 # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t
-# RUN: touch -m -t 19700101 %t
+# RUN: env TZ=GMT touch -m -t 19700101 %t
 # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t --write-if-changed
-# RUN: ls -l %t | FileCheck %s
+# RUN: env TZ=GMT ls -l %t | FileCheck %s
 
 # CHECK: {{[[:space:]]1970}}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b15f3fc - [X86] Fix disassembly of x86-64 GDTLS code sequence

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Andrew Ng
Date: 2021-02-03T14:16:46-08:00
New Revision: b15f3fc5c71dc8a9db7e931e2922a065293e4a64

URL: 
https://github.com/llvm/llvm-project/commit/b15f3fc5c71dc8a9db7e931e2922a065293e4a64
DIFF: 
https://github.com/llvm/llvm-project/commit/b15f3fc5c71dc8a9db7e931e2922a065293e4a64.diff

LOG: [X86] Fix disassembly of x86-64 GDTLS code sequence

For x86-64 the REX.w prefix takes precedence over any other size
override (i.e. 0x66). Therefore, for x86-64 when REX.w is present set
'hasOpSize' to false to ensure that any size override is ignored.

Fixes PR48901.

Differential Revision: https://reviews.llvm.org/D95682

(cherry picked from commit 94fedd266125a5425aa33e11332bf414f0b6dc35)

Added: 
llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s

Modified: 
llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
llvm/test/MC/Disassembler/X86/x86-64.txt

Removed: 




diff  --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp 
b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 05e482a6b66e..4e6d8e8e1a54 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
   insn->addressSize = (insn->hasAdSize ? 4 : 8);
   insn->displacementSize = 4;
   insn->immediateSize = 4;
+  insn->hasOpSize = false;
 } else {
   insn->registerSize = (insn->hasOpSize ? 2 : 4);
   insn->addressSize = (insn->hasAdSize ? 4 : 8);

diff  --git a/llvm/test/MC/Disassembler/X86/x86-64.txt 
b/llvm/test/MC/Disassembler/X86/x86-64.txt
index d91ef2500d99..5e56d4c796e6 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -329,8 +329,10 @@
 # CHECK: callw 32767
 0x66 0xe8 0xff 0x7f
 
-# CHECK: callw 32767
-0x66 0x66 0x48 0xe8 0xff 0x7f
+# TODO: Should display data16 prefixes.
+# CHECK-NOT: data16
+# CHECK: callq 32767
+0x66 0x66 0x48 0xe8 0xff 0x7f 0x00 0x00
 
 # CHECK: jmp -32769
 0xe9 0xff 0x7f 0xff 0xff
@@ -338,8 +340,10 @@
 # CHECK: jmp 32767
 0x66 0xe9 0xff 0x7f
 
+# TODO: Should display data16 prefixes.
+# CHECK-NOT: data16
 # CHECK: jmp 32767
-0x66 0x66 0x48 0xe9 0xff 0x7f
+0x66 0x66 0x48 0xe9 0xff 0x7f 0x00 0x00
 
 # CHECK: jo -32769
 0x0f 0x80 0xff 0x7f 0xff 0xff

diff  --git a/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s 
b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s
new file mode 100644
index ..e913f5f6a345
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -filetype=obj -triple=x86_64 | llvm-objdump -d - | FileCheck 
%s
+
+# CHECK:  :
+# TODO: Should display data16 prefixes.
+# CHECK-NEXT: 0: 66 48 8d 3d 00 00 00 00   leaq(%rip), %rdi  # 8 

+# CHECK-NEXT: 8: 66 66 48 e8 00 00 00 00   callq   0x10 
+# CHECK-EMPTY:
+
+PR48901:
+ data16
+ leaq   bar@TLSGD(%rip),%rdi
+ data16
+ data16
+ rex64
+ callq  __tls_get_addr@PLT
+
+.section .tdata,"awT",@progbits
+bar:
+.long 42



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c5904f5 - [LV] Fix crash when computing max VF too early

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Cullen Rhodes
Date: 2021-02-03T14:12:44-08:00
New Revision: c5904f5c9d32e563e2898e1242d5818e488fe2ee

URL: 
https://github.com/llvm/llvm-project/commit/c5904f5c9d32e563e2898e1242d5818e488fe2ee
DIFF: 
https://github.com/llvm/llvm-project/commit/c5904f5c9d32e563e2898e1242d5818e488fe2ee.diff

LOG: [LV] Fix crash when computing max VF too early

D90687 introduced a crash:

  llvm::LoopVectorizationCostModel::computeMaxVF(llvm::ElementCount, unsigned 
int):
Assertion `WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() 
&&
"No decisions should have been taken at this point"' failed.

when compiling the following C code:

  typedef struct {
  char a;
  } b;

  b *c;
  int d, e;

  int f() {
int g = 0;
for (; d; d++) {
  e = 0;
  for (; e < c[d].a; e++)
g++;
}
return g;
  }

with:

  clang -Os -target hexagon -mhvx -fvectorize -mv67 testcase.c -S -o -

This occurred since prior to D90687 computeFeasibleMaxVF would only be
called in computeMaxVF when a scalar epilogue was allowed, but now it's
always called. This causes the assert above since computeFeasibleMaxVF
collects all viable VFs larger than the default MaxVF, and for each VF
calculates the register usage which results in analysis being done the
assert above guards against. This can occur in computeFeasibleMaxVF if
TTI.shouldMaximizeVectorBandwidth and this target hook is implemented in
the hexagon backend to always return true.

Reported by @iajbar.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D94869

(cherry picked from commit 8cda227432f1c9ceb63b88802ed8136da97274f1)

Added: 
llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll

Modified: 
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea0d7673edf6..47635dbdda02 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5504,11 +5504,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount 
UserVF, unsigned UserIC) {
 return None;
   }
 
-  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
-
   switch (ScalarEpilogueStatus) {
   case CM_ScalarEpilogueAllowed:
-return MaxVF;
+return computeFeasibleMaxVF(TC, UserVF);
   case CM_ScalarEpilogueNotAllowedUsePredicate:
 LLVM_FALLTHROUGH;
   case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5546,7 +5544,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount 
UserVF, unsigned UserIC) {
   LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
"scalar epilogue instead.\n");
   ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
-  return MaxVF;
+  return computeFeasibleMaxVF(TC, UserVF);
 }
 return None;
   }
@@ -5563,6 +5561,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount 
UserVF, unsigned UserIC) {
 InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
+  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
   assert(!MaxVF.isScalable() &&
  "Scalable vectors do not yet support tail folding");
   assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&

diff  --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll 
b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
new file mode 100644
index ..5f8c5d329edf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s 2>&1 | 
FileCheck %s
+
+; Check that we don't crash.
+
+; CHECK-LABEL: @f
+; CHECK: vector.body
+
+target datalayout = 
"e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+; Function Attrs: optsize
+define i32 @f() #0 {
+entry:
+  br label %loop
+
+loop:
+  %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %0 = load i8, i8* undef, align 1
+  %g.1.lcssa = add i32 %g.016, undef
+  %iv.next = add nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, 0
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret i32 %g.1.lcssa
+}
+
+attributes #0 = { optsize "target-features"="+hvx-length128b" }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c738c8a - [RISCV] Update the version number to v0.10 for vector.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Hsiangkai Wang
Date: 2021-02-03T14:08:49-08:00
New Revision: c738c8aa9bf387cc960feca81bc5263e8c634e15

URL: 
https://github.com/llvm/llvm-project/commit/c738c8aa9bf387cc960feca81bc5263e8c634e15
DIFF: 
https://github.com/llvm/llvm-project/commit/c738c8aa9bf387cc960feca81bc5263e8c634e15.diff

LOG: [RISCV]  Update the version number to v0.10 for vector.

(cherry picked from commit 9847023660467a4469b5667bcf7a4c73a4780037)

Added: 


Modified: 
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Removed: 




diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 06e4d053d5d7..9fdfc2727d86 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -7,7 +7,7 @@
 
//===--===//
 ///
 /// This file contains the required infrastructure to support code generation
-/// for the standard 'V' (Vector) extension, version 0.9.  This version is 
still
+/// for the standard 'V' (Vector) extension, version 0.10.  This version is 
still
 /// experimental as the 'V' extension hasn't been ratified yet.
 ///
 /// This file is included from RISCVInstrInfoV.td

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index aea3d0e17ccc..79a1e6ddc8a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -8,7 +8,7 @@
 ///
 /// This file contains the required infrastructure and SDNode patterns to
 /// support code generation for the standard 'V' (Vector) extension, version
-/// 0.9.  This version is still experimental as the 'V' extension hasn't been
+/// 0.10.  This version is still experimental as the 'V' extension hasn't been
 /// ratified yet.
 ///
 /// This file is included from and depends upon RISCVInstrInfoVPseudos.td



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 237b39a - [RISCV] Update the version number to v0.10 for vector.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Hsiangkai Wang
Date: 2021-02-03T14:08:49-08:00
New Revision: 237b39a02f38b4903f39fef362d0f5e98e1de194

URL: 
https://github.com/llvm/llvm-project/commit/237b39a02f38b4903f39fef362d0f5e98e1de194
DIFF: 
https://github.com/llvm/llvm-project/commit/237b39a02f38b4903f39fef362d0f5e98e1de194.diff

LOG: [RISCV] Update the version number to v0.10 for vector.

v0.10 is tagged in V specification. Update the version to v0.10.

Differential Revision: https://reviews.llvm.org/D95680

(cherry picked from commit 282aca10aeb03bdaef0a8d4f3faa4c2ff236e527)

Added: 


Modified: 
clang/lib/Basic/Targets/RISCV.cpp
clang/lib/Driver/ToolChains/Arch/RISCV.cpp
clang/test/Driver/riscv-arch.c
clang/test/Preprocessor/riscv-target-features.c
llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoV.td
llvm/test/CodeGen/RISCV/attributes.ll
llvm/test/MC/RISCV/attribute-arch.s

Removed: 




diff  --git a/clang/lib/Basic/Targets/RISCV.cpp 
b/clang/lib/Basic/Targets/RISCV.cpp
index 0bf02e605740..786201ea340d 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -150,7 +150,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions 
,
   }
 
   if (HasV) {
-Builder.defineMacro("__riscv_v", "100");
+Builder.defineMacro("__riscv_v", "1");
 Builder.defineMacro("__riscv_vector");
   }
 
@@ -191,10 +191,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions 
,
 Builder.defineMacro("__riscv_zfh", "1000");
 
   if (HasZvamo)
-Builder.defineMacro("__riscv_zvamo", "100");
+Builder.defineMacro("__riscv_zvamo", "1");
 
   if (HasZvlsseg)
-Builder.defineMacro("__riscv_zvlsseg", "100");
+Builder.defineMacro("__riscv_zvlsseg", "1");
 }
 
 /// Return true if has this feature, need to sync with handleTargetFeatures.

diff  --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp 
b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index ffae47e5672e..c7f2a3ea5e02 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) {
   Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc")
 return RISCVExtensionVersion{"0", "93"};
   if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg")
-return RISCVExtensionVersion{"1", "0"};
+return RISCVExtensionVersion{"0", "10"};
   if (Ext == "zfh")
 return RISCVExtensionVersion{"0", "1"};
   return None;

diff  --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 3762a4aef1b3..cf148ca885d0 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -384,7 +384,7 @@
 // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1'
 // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for 
experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 
-menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10 
-menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s
 // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v"
 
@@ -412,7 +412,7 @@
 // RV32-EXPERIMENTAL-ZVAMO-BADVERS: error: invalid arch name 'rv32izvamo0p1'
 // RV32-EXPERIMENTAL-ZVAMO-BADVERS: unsupported version number 0.1 for 
experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo1p0 
-menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo0p10 
-menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVAMO-GOODVERS %s
 // RV32-EXPERIMENTAL-ZVAMO-GOODVERS: "-target-feature" "+experimental-zvamo"
 
@@ -431,6 +431,6 @@
 // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: error: invalid arch name 
'rv32izvlsseg0p1'
 // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: unsupported version number 0.1 for 
experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg1p0 
-menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg0p10 
-menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS %s
 // RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS: "-target-feature" 
"+experimental-zvlsseg"

diff  --git a/clang/test/Preprocessor/riscv-target-features.c 
b/clang/test/Preprocessor/riscv-target-features.c
index 006395505246..88826bbd60b8 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -110,23 +110,23 @@
 // CHECK-DOUBLE-NOT: __riscv_float_abi_single
 
 // RUN: %clang -target riscv32-unknown-linux-gnu 

[llvm-branch-commits] [llvm] dfb7633 - [PowerPC][Power10] Fix XXSPLI32DX not correctly exploiting specific cases

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Albion Fung
Date: 2021-02-03T14:07:33-08:00
New Revision: dfb763363bc560769605e37e96c1d13cb236223d

URL: 
https://github.com/llvm/llvm-project/commit/dfb763363bc560769605e37e96c1d13cb236223d
DIFF: 
https://github.com/llvm/llvm-project/commit/dfb763363bc560769605e37e96c1d13cb236223d.diff

LOG: [PowerPC][Power10] Fix XXSPLI32DX not correctly exploiting specific cases

Some cases may be transformed into 32 bit splats before hitting the boolean 
statement, which may cause incorrect behaviour and provide XXSPLTI32DX with the 
incorrect values of splat. The condition was reversed so that the shortcut 
prevents this problem.

Differential Revision: https://reviews.llvm.org/D95634

(cherry picked from commit 2e470e03b49f1d79ebc315ca9d62a690a633c0cd)

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 663ee15db11e..929a72ac687e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8604,16 +8604,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
-  // double. This is to exploit the XXSPLTIDP instruction.+  // If we lose 
precision, we use XXSPLTI32DX.
+  // double. This is to exploit the XXSPLTIDP instruction.
+  // If we lose precision, we use XXSPLTI32DX.
   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
   Subtarget.hasPrefixInstrs()) {
-if (convertToNonDenormSingle(APSplatBits) &&
-(Op->getValueType(0) == MVT::v2f64)) {
+// Check the type first to short-circuit so we don't modify APSplatBits if
+// this block isn't executed.
+if ((Op->getValueType(0) == MVT::v2f64) &&
+convertToNonDenormSingle(APSplatBits)) {
   SDValue SplatNode = DAG.getNode(
   PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
   DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
   return DAG.getBitcast(Op.getValueType(), SplatNode);
-} else if (APSplatBits.getBitWidth() == 64) {
+} else {
   // We may lose precision, so we have to use XXSPLTI32DX.
 
   uint32_t Hi =

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll 
b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
index 081cae729acf..ce4c2da24b0d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -101,23 +101,11 @@ entry:
   ret <8 x i16> 
 }
 
-define dso_local <16 x i8> @test_xxsplti32dx_10() {
-; CHECK-LABEL: test_xxsplti32dx_10:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:xxlxor vs34, vs34, vs34
-; CHECK-NEXT:xxsplti32dx vs34, 0, 1207959552
-; CHECK-NEXT:blr
-entry:
-  ret <16 x i8> 
-}
-
-; FIXME: It appears that there is something wrong with the computation
-;of the 64-bit constant to splat so we cannot emit xxsplti32dx for
-;this test case for now.
 define dso_local <16 x i8> @constSplatBug() {
 ; CHECK-LABEL: constSplatBug:
 ; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:plxv vs34, .LCPI10_0@PCREL(0), 1
+; CHECK-NEXT:xxlxor vs34, vs34, vs34
+; CHECK-NEXT:xxsplti32dx vs34, 0, 1191182336
 ; CHECK-NEXT:blr
 entry:
   ret <16 x i8> 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b351efc - [PowerPC] Do not emit XXSPLTI32DX for sub 64-bit constants

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Nemanja Ivanovic
Date: 2021-02-03T14:07:33-08:00
New Revision: b351efcae08a59c0cafa123a92b24c5f2300202b

URL: 
https://github.com/llvm/llvm-project/commit/b351efcae08a59c0cafa123a92b24c5f2300202b
DIFF: 
https://github.com/llvm/llvm-project/commit/b351efcae08a59c0cafa123a92b24c5f2300202b.diff

LOG: [PowerPC] Do not emit XXSPLTI32DX for sub 64-bit constants

If the APInt returned by BuildVectorSDNode::isConstantSplat() is narrower than
64 bits, the result produced by XXSPLTI32DX is incorrect. The result returned
by the function appears to be incorrect and we'll investigate/fix it in a
follow-up commit. However, since this causes miscompiles, we must
temporarily disable emitting this instruction for such values.

(cherry picked from commit 54e570d94af995ff58287a8288389641910a8239)

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9215c17cb94b..663ee15db11e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8613,7 +8613,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
   DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
   return DAG.getBitcast(Op.getValueType(), SplatNode);
-} else { // We may lose precision, so we have to use XXSPLTI32DX.
+} else if (APSplatBits.getBitWidth() == 64) {
+  // We may lose precision, so we have to use XXSPLTI32DX.
 
   uint32_t Hi =
   (uint32_t)((APSplatBits.getZExtValue() & 0xLL) >> 
32);

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll 
b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
index 420a96dc1495..081cae729acf 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -100,3 +100,25 @@ define dso_local <8 x i16> @test_xxsplti32dx_9() {
 entry:
   ret <8 x i16> 
 }
+
+define dso_local <16 x i8> @test_xxsplti32dx_10() {
+; CHECK-LABEL: test_xxsplti32dx_10:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:xxlxor vs34, vs34, vs34
+; CHECK-NEXT:xxsplti32dx vs34, 0, 1207959552
+; CHECK-NEXT:blr
+entry:
+  ret <16 x i8> 
+}
+
+; FIXME: It appears that there is something wrong with the computation
+;of the 64-bit constant to splat so we cannot emit xxsplti32dx for
+;this test case for now.
+define dso_local <16 x i8> @constSplatBug() {
+; CHECK-LABEL: constSplatBug:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:plxv vs34, .LCPI10_0@PCREL(0), 1
+; CHECK-NEXT:blr
+entry:
+  ret <16 x i8> 
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e3658ce - [VE] Change inetger constants 32-bit friendly

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Kazushi (Jam) Marukawa
Date: 2021-02-03T14:04:14-08:00
New Revision: e3658cefc5bc3538d05fc8ef058d83bcd24b785a

URL: 
https://github.com/llvm/llvm-project/commit/e3658cefc5bc3538d05fc8ef058d83bcd24b785a
DIFF: 
https://github.com/llvm/llvm-project/commit/e3658cefc5bc3538d05fc8ef058d83bcd24b785a.diff

LOG: [VE] Change inetger constants 32-bit friendly

Correct integer constants like `1UL << 63` to `UINT64_C(1) << 63` in
order to make them work on 32-bit machines.  Tested on both an i386
and x86_64 machines.

Reviewed By: mgorny

Differential Revision: https://reviews.llvm.org/D95724

(cherry picked from commit 4648098f97fa2a7c08c04632c70cf29293528812)

Added: 


Modified: 
llvm/lib/Target/VE/VE.h

Removed: 




diff  --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index a404f7ced70a..8c1fa840f19c 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
 return true;
   }
   // (m)1 patterns
-  return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+  return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
 }
 
 inline static bool isMImm32Val(uint32_t Val) {
@@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) {
 return true;
   }
   // (m)1 patterns
-  return (Val & (1 << 31)) && isShiftedMask_32(Val);
+  return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
 }
 
 /// val2MImm - Convert an integer immediate value to target MImm immediate.
 inline static uint64_t val2MImm(uint64_t Val) {
   if (Val == 0)
 return 0; // (0)1
-  if (Val & (1UL << 63))
+  if (Val & (UINT64_C(1) << 63))
 return countLeadingOnes(Val);   // (m)1
   return countLeadingZeros(Val) | 0x40; // (m)0
 }
@@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) {
   if (Val == 0)
 return 0; // (0)1
   if ((Val & 0x40) == 0)
-return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1
-  return ((uint64_t)(-1L) >> (Val & 0x3f));// (m)0
+return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+  return ((uint64_t)INT64_C(-1) >> (Val & 0x3f));  // (m)0
 }
 
 inline unsigned M0(unsigned Val) { return Val + 64; }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] 0564dd9 - [OpenMP] Fix python3 compatibility in openmp's lit.cfg

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Tobias Hieta
Date: 2021-02-03T11:50:49-08:00
New Revision: 0564dd904bf7ef7758cb904ed8f7f2a1f915ef8d

URL: 
https://github.com/llvm/llvm-project/commit/0564dd904bf7ef7758cb904ed8f7f2a1f915ef8d
DIFF: 
https://github.com/llvm/llvm-project/commit/0564dd904bf7ef7758cb904ed8f7f2a1f915ef8d.diff

LOG: [OpenMP] Fix python3 compatibility in openmp's lit.cfg

Differential Revision: https://reviews.llvm.org/D95669

(cherry picked from commit c3c02d0d5a313272f6d35926bdf678fc6b884c02)

Added: 


Modified: 
openmp/runtime/test/lit.cfg

Removed: 




diff  --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg
index 0d4a6107ff2b..c4e5fe1ea9e0 100644
--- a/openmp/runtime/test/lit.cfg
+++ b/openmp/runtime/test/lit.cfg
@@ -76,7 +76,7 @@ if config.operating_system == 'Darwin':
   cmd = subprocess.Popen(['xcrun', '--show-sdk-path'],
  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   out, err = cmd.communicate()
-  out = out.strip()
+  out = out.strip().decode()
   res = cmd.wait()
   if res == 0 and out:
 config.test_flags += " -isysroot " + out



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 52a70a0 - [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - don't merge VPERMILPD ops with different low/high masks.

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-02-03T11:26:33-08:00
New Revision: 52a70a07e93c322ad137bce1a1ff2f1c9fdf6050

URL: 
https://github.com/llvm/llvm-project/commit/52a70a07e93c322ad137bce1a1ff2f1c9fdf6050
DIFF: 
https://github.com/llvm/llvm-project/commit/52a70a07e93c322ad137bce1a1ff2f1c9fdf6050.diff

LOG: [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - don't merge VPERMILPD 
ops with different low/high masks.

Unlike VPERMILPS, VPERMILPD can have non-repeating masks in each 128-bit 
subvector, we weren't accounting for this when folding 
vperm2f128(vpermilpd(x,c),vpermilpd(y,c)) -> vpermilpd(vperm2f128(x,y),c).

I'm intending to add support for this but wanted to get a minimal fix in first 
for merging into 12.xx.

Fixes PR48908

(cherry picked from commit 6663330bc8c84a75ea092272297b557bfc310380)

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0dd20235aa3c..6b816c710f98 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36916,11 +36916,18 @@ static SDValue 
canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
 Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res));
 return DAG.getBitcast(VT, Res);
   }
+  case X86ISD::VPERMILPI:
+// TODO: Handle v4f64 permutes with 
diff erent low/high lane masks.
+if (SrcVT0 == MVT::v4f64) {
+  uint64_t Mask = Src0.getConstantOperandVal(1);
+  if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
+break;
+}
+LLVM_FALLTHROUGH;
   case X86ISD::VSHLI:
   case X86ISD::VSRLI:
   case X86ISD::VSRAI:
   case X86ISD::PSHUFD:
-  case X86ISD::VPERMILPI:
 if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) {
   SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
   SDValue RHS =

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll 
b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 3da83b25d363..1a1153d0e886 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -442,16 +442,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, 
<4 x double> %v2, <4 x
 ; X86-AVX1-NEXT:movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX1-NEXT:movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:movl {{[0-9]+}}(%esp), %edx
-; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
-; X86-AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm1, %ymm4
-; X86-AVX1-NEXT:vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
-; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX1-NEXT:vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2]
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm1, %ymm5
+; X86-AVX1-NEXT:vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3]
 ; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
-; X86-AVX1-NEXT:vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
-; X86-AVX1-NEXT:vmovapd %ymm4, (%edx)
-; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
-; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
-; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1]
+; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3]
+; X86-AVX1-NEXT:vmovapd %ymm3, (%edx)
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1]
+; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3]
+; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3]
 ; X86-AVX1-NEXT:vmovapd %ymm3, (%ecx)
 ; X86-AVX1-NEXT:vextractf128 $1, %ymm0, %xmm0
 ; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
@@ -513,16 +515,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, 
<4 x double> %v2, <4 x
 ;
 ; X64-AVX1-LABEL: PR48908:
 ; X64-AVX1:   # %bb.0:
-; X64-AVX1-NEXT:vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
-; X64-AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm1, %ymm4
-; X64-AVX1-NEXT:vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
-; X64-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X64-AVX1-NEXT:vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X64-AVX1-NEXT:vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2]
+; X64-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X64-AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm1, %ymm5
+; X64-AVX1-NEXT:vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3]
 ; X64-AVX1-NEXT:vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
-; X64-AVX1-NEXT:vshufpd {{.*#+}} ymm4 = 

[llvm-branch-commits] [llvm] c1899cd - [X86][AVX] Add PR48908 shuffle test case

2021-02-03 Thread Tom Stellard via llvm-branch-commits

Author: Simon Pilgrim
Date: 2021-02-03T11:26:33-08:00
New Revision: c1899cd5102dbdacd006fdb33db075319ccc933f

URL: 
https://github.com/llvm/llvm-project/commit/c1899cd5102dbdacd006fdb33db075319ccc933f
DIFF: 
https://github.com/llvm/llvm-project/commit/c1899cd5102dbdacd006fdb33db075319ccc933f.diff

LOG: [X86][AVX] Add PR48908 shuffle test case

(cherry picked from commit da8845fc3d3bb0b0e133f020931440511fa72723)

Added: 


Modified: 
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll 
b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 979c365acfd7..3da83b25d363 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -436,6 +436,157 @@ entry:
   unreachable
 }
 
+define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 
x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias 
%out2) {
+; X86-AVX1-LABEL: PR48908:
+; X86-AVX1:   # %bb.0:
+; X86-AVX1-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX1-NEXT:movl {{[0-9]+}}(%esp), %edx
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm2, %ymm1, %ymm4
+; X86-AVX1-NEXT:vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
+; X86-AVX1-NEXT:vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
+; X86-AVX1-NEXT:vmovapd %ymm4, (%edx)
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
+; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
+; X86-AVX1-NEXT:vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX1-NEXT:vmovapd %ymm3, (%ecx)
+; X86-AVX1-NEXT:vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X86-AVX1-NEXT:vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X86-AVX1-NEXT:vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X86-AVX1-NEXT:vmovapd %ymm0, (%eax)
+; X86-AVX1-NEXT:vzeroupper
+; X86-AVX1-NEXT:retl
+;
+; X86-AVX2-LABEL: PR48908:
+; X86-AVX2:   # %bb.0:
+; X86-AVX2-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT:movl {{[0-9]+}}(%esp), %edx
+; X86-AVX2-NEXT:vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX2-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX2-NEXT:vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
+; X86-AVX2-NEXT:vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
+; X86-AVX2-NEXT:vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
+; X86-AVX2-NEXT:vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
+; X86-AVX2-NEXT:vmovapd %ymm3, (%edx)
+; X86-AVX2-NEXT:vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
+; X86-AVX2-NEXT:vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
+; X86-AVX2-NEXT:vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX2-NEXT:vmovapd %ymm3, (%ecx)
+; X86-AVX2-NEXT:vextractf128 $1, %ymm0, %xmm0
+; X86-AVX2-NEXT:vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X86-AVX2-NEXT:vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X86-AVX2-NEXT:vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X86-AVX2-NEXT:vmovapd %ymm0, (%eax)
+; X86-AVX2-NEXT:vzeroupper
+; X86-AVX2-NEXT:retl
+;
+; X86-AVX512-LABEL: PR48908:
+; X86-AVX512:   # %bb.0:
+; X86-AVX512-NEXT:# kill: def $ymm2 killed $ymm2 def $zmm2
+; X86-AVX512-NEXT:# kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-AVX512-NEXT:# kill: def $ymm0 killed $ymm0 def $zmm0
+; X86-AVX512-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512-NEXT:vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX512-NEXT:vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
+; X86-AVX512-NEXT:vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX512-NEXT:vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
+; X86-AVX512-NEXT:vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0]
+; X86-AVX512-NEXT:vpermt2pd %zmm2, %zmm5, %zmm3
+; X86-AVX512-NEXT:vmovapd %ymm3, (%edx)
+; X86-AVX512-NEXT:vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0]
+; X86-AVX512-NEXT:vpermt2pd %zmm0, %zmm3, %zmm4
+; X86-AVX512-NEXT:vmovapd %ymm4, (%ecx)
+; X86-AVX512-NEXT:vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u>
+; X86-AVX512-NEXT:vpermi2pd %zmm1, %zmm0, %zmm3
+; X86-AVX512-NEXT:vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0]
+; X86-AVX512-NEXT:vpermi2pd %zmm3, %zmm2, %zmm0
+; X86-AVX512-NEXT:vmovapd %ymm0, (%eax)
+; X86-AVX512-NEXT:vzeroupper
+; 

[llvm-branch-commits] [clang] 162642b - Revert "[ConstantFold] Fold more operations to poison"

2021-02-03 Thread Juneyoung Lee via llvm-branch-commits

Author: Juneyoung Lee
Date: 2021-02-04T01:22:55+09:00
New Revision: 162642bec0df760b27e66cfff046b40f1dfd2713

URL: 
https://github.com/llvm/llvm-project/commit/162642bec0df760b27e66cfff046b40f1dfd2713
DIFF: 
https://github.com/llvm/llvm-project/commit/162642bec0df760b27e66cfff046b40f1dfd2713.diff

LOG: Revert "[ConstantFold] Fold more operations to poison"

This reverts commit 53040a968dc2ff20931661e55f05da2ef8b964a0 due to its
bad interaction with select i1 -> and/or i1 transformation.

This fixes:
https://bugs.llvm.org/show_bug.cgi?id=49005
https://bugs.llvm.org/show_bug.cgi?id=48435

(cherry picked from commit 06829034ca64b8c83a5b20d8abe5ddbfe7af0004)

Added: 


Modified: 
clang/test/Frontend/fixed_point_unary.c
llvm/lib/IR/ConstantFold.cpp
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
llvm/test/Transforms/InstCombine/apint-shift.ll
llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
llvm/test/Transforms/InstCombine/icmp.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll

llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
llvm/test/Transforms/InstCombine/select-of-bittest.ll
llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll
llvm/test/Transforms/InstCombine/shift-add.ll
llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll
llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
llvm/test/Transforms/InstSimplify/ConstProp/poison.ll
llvm/test/Transforms/InstSimplify/ConstProp/shift.ll

llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll
llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll
llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
llvm/test/Transforms/InstSimplify/div.ll
llvm/test/Transforms/InstSimplify/rem.ll
llvm/test/Transforms/InstSimplify/undef.ll
llvm/test/Transforms/SROA/phi-gep.ll
llvm/test/Transforms/SROA/select-gep.ll
llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
llvm/unittests/IR/ConstantsTest.cpp

Removed: 




diff  --git a/clang/test/Frontend/fixed_point_unary.c 
b/clang/test/Frontend/fixed_point_unary.c
index 6ce760daba11..849e38a94bc4 100644
--- a/clang/test/Frontend/fixed_point_unary.c
+++ b/clang/test/Frontend/fixed_point_unary.c
@@ -90,7 +90,7 @@ void inc_usa() {
 // SIGNED-LABEL: @inc_uf(
 // SIGNED-NEXT:  entry:
 // SIGNED-NEXT:[[TMP0:%.*]] = load i16, i16* @uf, align 2
-// SIGNED-NEXT:[[TMP1:%.*]] = add i16 [[TMP0]], poison
+// SIGNED-NEXT:[[TMP1:%.*]] = add i16 [[TMP0]], undef
 // SIGNED-NEXT:store i16 [[TMP1]], i16* @uf, align 2
 // SIGNED-NEXT:ret void
 //
@@ -271,7 +271,7 @@ void dec_usa() {
 // SIGNED-LABEL: @dec_uf(
 // SIGNED-NEXT:  entry:
 // SIGNED-NEXT:[[TMP0:%.*]] = load i16, i16* @uf, align 2
-// SIGNED-NEXT:[[TMP1:%.*]] = sub i16 [[TMP0]], poison
+// SIGNED-NEXT:[[TMP1:%.*]] = sub i16 [[TMP0]], undef
 // SIGNED-NEXT:store i16 [[TMP1]], i16* @uf, align 2
 // SIGNED-NEXT:ret void
 //

diff  --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 03cb108cc485..95dd55237e5f 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, 
Constant *V,
   V.convertToInteger(IntVal, APFloat::rmTowardZero, )) {
 // Undefined behavior invoked - the destination type can't represent
 // the input constant.
-return PoisonValue::get(DestTy);
+return