[llvm-branch-commits] [clang] [HLSL] Add helpers to simplify HLSL resource type declarations. NFC (PR #73967)

2023-11-30 Thread David Peixotto via llvm-branch-commits

https://github.com/dmpots approved this pull request.


https://github.com/llvm/llvm-project/pull/73967
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Add helpers to simplify HLSL resource type declarations. NFC (PR #73967)

2023-11-30 Thread Xiang Li via llvm-branch-commits

https://github.com/python3kgae approved this pull request.


https://github.com/llvm/llvm-project/pull/73967
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Add helpers to simplify HLSL resource type declarations. NFC (PR #73967)

2023-11-30 Thread Chris B via llvm-branch-commits

https://github.com/llvm-beanz approved this pull request.


https://github.com/llvm/llvm-project/pull/73967
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Add helpers to simplify HLSL resource type declarations. NFC (PR #73967)

2023-11-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Justin Bogner (bogner)


Changes

A few changes to HLSLExternalSemaSource and its BuiltinTypeDeclBuilder
to make defining buffer types less verbose. This will make it a lot
easier to see what the differences between the various buffer types
are once we start introducing more of them.


---
Full diff: https://github.com/llvm/llvm-project/pull/73967.diff


2 Files Affected:

- (modified) clang/include/clang/Sema/HLSLExternalSemaSource.h (+2-2) 
- (modified) clang/lib/Sema/HLSLExternalSemaSource.cpp (+34-20) 


``diff
diff --git a/clang/include/clang/Sema/HLSLExternalSemaSource.h 
b/clang/include/clang/Sema/HLSLExternalSemaSource.h
index 4b6bc96f72e225c..c0bfff327139f8c 100644
--- a/clang/include/clang/Sema/HLSLExternalSemaSource.h
+++ b/clang/include/clang/Sema/HLSLExternalSemaSource.h
@@ -30,9 +30,9 @@ class HLSLExternalSemaSource : public ExternalSemaSource {
 
   void defineHLSLVectorAlias();
   void defineTrivialHLSLTypes();
-  void forwardDeclareHLSLTypes();
+  void defineHLSLTypesWithForwardDeclarations();
 
-  void completeBufferType(CXXRecordDecl *Record);
+  void onCompletion(CXXRecordDecl *Record, CompletionFunction Fn);
 
 public:
   ~HLSLExternalSemaSource() override;
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp 
b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 5057bc6629f046a..8ed6480a9f5c9c8 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -306,6 +306,7 @@ struct BuiltinTypeDeclBuilder {
   }
 
   TemplateParameterListBuilder addTemplateArgumentList();
+  BuiltinTypeDeclBuilder &addSimpleTemplateParams(ArrayRef Names);
 };
 
 struct TemplateParameterListBuilder {
@@ -360,11 +361,19 @@ struct TemplateParameterListBuilder {
 return Builder;
   }
 };
+} // namespace
 
 TemplateParameterListBuilder BuiltinTypeDeclBuilder::addTemplateArgumentList() 
{
   return TemplateParameterListBuilder(*this);
 }
-} // namespace
+
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addSimpleTemplateParams(ArrayRef Names) {
+  TemplateParameterListBuilder Builder = this->addTemplateArgumentList();
+  for (StringRef Name : Names)
+Builder.addTypeParameter(Name);
+  return Builder.finalizeTemplateArgs();
+}
 
 HLSLExternalSemaSource::~HLSLExternalSemaSource() {}
 
@@ -390,7 +399,7 @@ void HLSLExternalSemaSource::InitializeSema(Sema &S) {
   // Force external decls in the HLSL namespace to load from the PCH.
   (void)HLSLNamespace->getCanonicalDecl()->decls_begin();
   defineTrivialHLSLTypes();
-  forwardDeclareHLSLTypes();
+  defineHLSLTypesWithForwardDeclarations();
 
   // This adds a `using namespace hlsl` directive. In DXC, we don't put HLSL's
   // built in types inside a namespace, but we are planning to change that in
@@ -467,18 +476,32 @@ void HLSLExternalSemaSource::defineTrivialHLSLTypes() {
  .Record;
 }
 
-void HLSLExternalSemaSource::forwardDeclareHLSLTypes() {
+/// Set up common members and attributes for buffer types
+static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S,
+  ResourceClass RC,
+  ResourceKind RK) {
+  return BuiltinTypeDeclBuilder(Decl)
+  .addHandleMember()
+  .addDefaultHandleConstructor(S, RC)
+  .annotateResourceClass(RC, RK);
+}
+
+void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
   CXXRecordDecl *Decl;
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RWBuffer")
- .addTemplateArgumentList()
- .addTypeParameter("element_type")
- .finalizeTemplateArgs()
+ .addSimpleTemplateParams({"element_type"})
  .Record;
-  if (!Decl->isCompleteDefinition())
-Completions.insert(
-std::make_pair(Decl->getCanonicalDecl(),
-   std::bind(&HLSLExternalSemaSource::completeBufferType,
- this, std::placeholders::_1)));
+  onCompletion(Decl, [this](CXXRecordDecl *Decl) {
+setupBufferType(Decl, *SemaPtr, ResourceClass::UAV,
+ResourceKind::TypedBuffer)
+.addArraySubscriptOperators()
+.completeDefinition();
+  });
+}
+
+void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record,
+  CompletionFunction Fn) {
+  Completions.insert(std::make_pair(Record->getCanonicalDecl(), Fn));
 }
 
 void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) {
@@ -496,12 +519,3 @@ void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) {
 return;
   It->second(Record);
 }
-
-void HLSLExternalSemaSource::completeBufferType(CXXRecordDecl *Record) {
-  BuiltinTypeDeclBuilder(Record)
-  .addHandleMember()
-  .addDefaultHandleConstructor(*SemaPtr, ResourceClass::UAV)
-  .addArraySubscriptOperators()
-  .annotateResourceClass(ResourceClass::UAV, ResourceKind::TypedBuffer)
-  .completeDefinition();

[llvm-branch-commits] [clang] [HLSL] Add helpers to simplify HLSL resource type declarations. NFC (PR #73967)

2023-11-30 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner created 
https://github.com/llvm/llvm-project/pull/73967

A few changes to HLSLExternalSemaSource and its BuiltinTypeDeclBuilder
to make defining buffer types less verbose. This will make it a lot
easier to see what the differences between the various buffer types
are once we start introducing more of them.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 9ce6298 - [𝘀𝗽𝗿] initial version

2023-11-30 Thread Justin Bogner via llvm-branch-commits

Author: Justin Bogner
Date: 2023-11-30T10:30:26-08:00
New Revision: 9ce6298b63e3bdd3e3468b30ef5c55743ff1f776

URL: 
https://github.com/llvm/llvm-project/commit/9ce6298b63e3bdd3e3468b30ef5c55743ff1f776
DIFF: 
https://github.com/llvm/llvm-project/commit/9ce6298b63e3bdd3e3468b30ef5c55743ff1f776.diff

LOG: [𝘀𝗽𝗿] initial version

Created using spr 1.3.5

Added: 


Modified: 
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Sema/HLSLExternalSemaSource.h
clang/include/clang/Sema/Sema.h
clang/lib/Sema/HLSLExternalSemaSource.cpp
clang/lib/Sema/SemaDecl.cpp
clang/lib/Sema/SemaDeclCXX.cpp
clang/lib/Sema/SemaInit.cpp
clang/lib/Sema/SemaLambda.cpp
clang/lib/Sema/SemaTemplate.cpp
clang/lib/Sema/SemaType.cpp
clang/test/AST/HLSL/RWBuffer-AST.hlsl
clang/test/AST/HLSL/pch.hlsl
clang/test/AST/HLSL/pch_with_buf.hlsl
clang/test/AST/HLSL/resource_binding_attr.hlsl
clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
clang/test/SemaHLSL/BuiltIns/vector-errors.hlsl

Removed: 




diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6dfb2d7195203a3..e0ac3af7b98e360 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4933,6 +4933,8 @@ def err_template_param_shadow : Error<
 def ext_template_param_shadow : ExtWarn<
   err_template_param_shadow.Summary>, InGroup;
 def note_template_param_here : Note<"template parameter is declared here">;
+def note_template_param_external : Note<
+  "template parameter from hidden source: %0">;
 def warn_template_export_unsupported : Warning<
   "exported templates are unsupported">;
 def err_template_outside_namespace_or_class_scope : Error<
@@ -5061,6 +5063,8 @@ def err_template_arg_list_
diff erent_arity : Error<
   "%select{class template|function template|variable template|alias template|"
   "template template parameter|concept|template}1 %2">;
 def note_template_decl_here : Note<"template is declared here">;
+def note_template_decl_external : Note<
+  "template declaration from hidden source: %0">;
 def err_template_arg_must_be_type : Error<
   "template argument for template type parameter must be a type">;
 def err_template_arg_must_be_type_suggest : Error<

diff  --git a/clang/include/clang/Sema/HLSLExternalSemaSource.h 
b/clang/include/clang/Sema/HLSLExternalSemaSource.h
index 4b6bc96f72e225c..c0bfff327139f8c 100644
--- a/clang/include/clang/Sema/HLSLExternalSemaSource.h
+++ b/clang/include/clang/Sema/HLSLExternalSemaSource.h
@@ -30,9 +30,9 @@ class HLSLExternalSemaSource : public ExternalSemaSource {
 
   void defineHLSLVectorAlias();
   void defineTrivialHLSLTypes();
-  void forwardDeclareHLSLTypes();
+  void defineHLSLTypesWithForwardDeclarations();
 
-  void completeBufferType(CXXRecordDecl *Record);
+  void onCompletion(CXXRecordDecl *Record, CompletionFunction Fn);
 
 public:
   ~HLSLExternalSemaSource() override;

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 6de1a098e067a38..a4912e3625c389f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -8557,6 +8557,10 @@ class Sema final {
  TemplateParameterList *Params,
  TemplateArgumentLoc &Arg);
 
+  void NoteTemplateLocation(const NamedDecl &Decl,
+std::optional ParamRange = {});
+  void NoteTemplateParameterLocation(const NamedDecl &Decl);
+
   ExprResult
   BuildExpressionFromDeclTemplateArgument(const TemplateArgument &Arg,
   QualType ParamType,

diff  --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp 
b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 8de144486c91df6..8ed6480a9f5c9c8 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -306,6 +306,7 @@ struct BuiltinTypeDeclBuilder {
   }
 
   TemplateParameterListBuilder addTemplateArgumentList();
+  BuiltinTypeDeclBuilder &addSimpleTemplateParams(ArrayRef Names);
 };
 
 struct TemplateParameterListBuilder {
@@ -360,11 +361,19 @@ struct TemplateParameterListBuilder {
 return Builder;
   }
 };
+} // namespace
 
 TemplateParameterListBuilder BuiltinTypeDeclBuilder::addTemplateArgumentList() 
{
   return TemplateParameterListBuilder(*this);
 }
-} // namespace
+
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addSimpleTemplateParams(ArrayRef Names) {
+  TemplateParameterListBuilder Builder = this->addTemplateArgumentList();
+  for (StringRef Name : Names)
+Builder.addTypeParameter(Name);
+  return Builder.finalizeTemplateArgs();
+}
 
 HLSLExternalSemaSource::~HLSLExternalSemaSource() {}
 
@@ -390,7 +399,7 @@ void HLSLExternalSemaSource::InitializeSema(Sema &S) {
   // Force external decls in the HLSL namespace to loa

[llvm-branch-commits] [llvm] ae3cd2d - [X86][NFC] Clang-format X86FoldTablesUtils.h

2023-11-30 Thread Shengchen Kan via llvm-branch-commits

Author: Shengchen Kan
Date: 2023-11-30T17:16:40+08:00
New Revision: ae3cd2d04a88937881ac1f9c5d34f0e0cf96d5bc

URL: 
https://github.com/llvm/llvm-project/commit/ae3cd2d04a88937881ac1f9c5d34f0e0cf96d5bc
DIFF: 
https://github.com/llvm/llvm-project/commit/ae3cd2d04a88937881ac1f9c5d34f0e0cf96d5bc.diff

LOG: [X86][NFC] Clang-format X86FoldTablesUtils.h

Added: 


Modified: 
llvm/include/llvm/Support/X86FoldTablesUtils.h

Removed: 




diff  --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h 
b/llvm/include/llvm/Support/X86FoldTablesUtils.h
index bddff7068b82690..1cce9cdaf65f8d6 100644
--- a/llvm/include/llvm/Support/X86FoldTablesUtils.h
+++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h
@@ -13,23 +13,23 @@ namespace llvm {
 enum {
   // Select which memory operand is being unfolded.
   // (stored in bits 0 - 2)
-  TB_INDEX_0= 0,
-  TB_INDEX_1= 1,
-  TB_INDEX_2= 2,
-  TB_INDEX_3= 3,
-  TB_INDEX_4= 4,
+  TB_INDEX_0 = 0,
+  TB_INDEX_1 = 1,
+  TB_INDEX_2 = 2,
+  TB_INDEX_3 = 3,
+  TB_INDEX_4 = 4,
   TB_INDEX_MASK = 0x7,
 
   // Do not insert the reverse map (MemOp -> RegOp) into the table.
   // This may be needed because there is a many -> one mapping.
-  TB_NO_REVERSE   = 1 << 3,
+  TB_NO_REVERSE = 1 << 3,
 
   // Do not insert the forward map (RegOp -> MemOp) into the table.
   // This is needed for Native Client, which prohibits branch
   // instructions from using a memory operand.
-  TB_NO_FORWARD   = 1 << 4,
+  TB_NO_FORWARD = 1 << 4,
 
-  TB_FOLDED_LOAD  = 1 << 5,
+  TB_FOLDED_LOAD = 1 << 5,
   TB_FOLDED_STORE = 1 << 6,
   TB_FOLDED_BCAST = 1 << 7,
 
@@ -37,19 +37,19 @@ enum {
   // Used for RegOp->MemOp conversion. Encoded as Log2(Align)
   // (stored in bits 9 - 11)
   TB_ALIGN_SHIFT = 8,
-  TB_ALIGN_1 =   0 << TB_ALIGN_SHIFT,
-  TB_ALIGN_16=   4 << TB_ALIGN_SHIFT,
-  TB_ALIGN_32=   5 << TB_ALIGN_SHIFT,
-  TB_ALIGN_64=   6 << TB_ALIGN_SHIFT,
-  TB_ALIGN_MASK  = 0x7 << TB_ALIGN_SHIFT,
+  TB_ALIGN_1 = 0 << TB_ALIGN_SHIFT,
+  TB_ALIGN_16 = 4 << TB_ALIGN_SHIFT,
+  TB_ALIGN_32 = 5 << TB_ALIGN_SHIFT,
+  TB_ALIGN_64 = 6 << TB_ALIGN_SHIFT,
+  TB_ALIGN_MASK = 0x7 << TB_ALIGN_SHIFT,
 
   // Broadcast type.
   // (stored in bits 12 - 13)
   TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3,
-  TB_BCAST_D=   0 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_Q=   1 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SS   =   2 << TB_BCAST_TYPE_SHIFT,
-  TB_BCAST_SD   =   3 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
+  TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
   TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
 
   // Unused bits 14-15



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 6d2dfd3 - [LPM] Set gen_crash_diag=false for non-MSSA pass in MSSA pipeline

2023-11-30 Thread Nikita Popov via llvm-branch-commits

Author: Nikita Popov
Date: 2023-11-30T10:21:35+01:00
New Revision: 6d2dfd37bd50b21ed90427052198bd1f06c761f8

URL: 
https://github.com/llvm/llvm-project/commit/6d2dfd37bd50b21ed90427052198bd1f06c761f8
DIFF: 
https://github.com/llvm/llvm-project/commit/6d2dfd37bd50b21ed90427052198bd1f06c761f8.diff

LOG: [LPM] Set gen_crash_diag=false for non-MSSA pass in MSSA pipeline

When a loop pass that does not preserve MSSA is run as part of a
loop-mssa pipeline, this is user error and we should not ask for
a bug report.

Fixes https://github.com/llvm/llvm-project/issues/73554.

Added: 


Modified: 
llvm/lib/Transforms/Scalar/LoopPassManager.cpp
llvm/test/Other/loop-mssa-not-preserved.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp 
b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 4bf192b0600766d..a4f2dbf9a582899 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -313,7 +313,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function 
&F,
 
 if (LAR.MSSA && !PassPA.getChecker().preserved())
   report_fatal_error("Loop pass manager using MemorySSA contains a pass "
- "that does not preserve MemorySSA");
+ "that does not preserve MemorySSA",
+ /*gen_crash_diag*/ false);
 
 #ifndef NDEBUG
 // LoopAnalysisResults should always be valid.

diff  --git a/llvm/test/Other/loop-mssa-not-preserved.ll 
b/llvm/test/Other/loop-mssa-not-preserved.ll
index 38390262f113aec..d33e7a83326f492 100644
--- a/llvm/test/Other/loop-mssa-not-preserved.ll
+++ b/llvm/test/Other/loop-mssa-not-preserved.ll
@@ -1,4 +1,4 @@
-; RUN: not --crash opt -passes='loop-mssa(loop-unroll-full)' 2>&1 < %s | 
FileCheck %s
+; RUN: not opt -passes='loop-mssa(loop-unroll-full)' 2>&1 < %s | FileCheck %s
 
 ; CHECK: LLVM ERROR: Loop pass manager using MemorySSA contains a pass that 
does not preserve MemorySSA
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 14ca8d4 - [clang] Fix a bug with qualified name lookup into current instantiation (#73018)

2023-11-30 Thread via llvm-branch-commits

Author: Mariya Podchishchaeva
Date: 2023-11-30T10:46:35+01:00
New Revision: 14ca8d44d0f44ea5125b3c41b66276c902929a54

URL: 
https://github.com/llvm/llvm-project/commit/14ca8d44d0f44ea5125b3c41b66276c902929a54
DIFF: 
https://github.com/llvm/llvm-project/commit/14ca8d44d0f44ea5125b3c41b66276c902929a54.diff

LOG: [clang] Fix a bug with qualified name lookup into current instantiation 
(#73018)

Due to d0d2ee0e4bbe915d649e983c12d37bcfcf58823c clang doesn't perform
qualified name lookup into the current instantiation when it has
dependent bases, because of that `getTypeName` call always returns null
for unknown specialization case. When there is a `typename` keyword,
`DependentNameType` is constructed instead of simply returning null.
This change attempts to do the same in case of `typename` absence.

Fixes https://github.com/llvm/llvm-project/issues/13826

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/lib/Sema/SemaDecl.cpp
clang/test/SemaTemplate/dependent-base-classes.cpp

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7d64647433d92a7..748e2db2f850744 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -645,6 +645,9 @@ Bug Fixes in This Version
 - Fix crash when the object used as a ``static_assert`` message has ``size`` 
or ``data`` members
   which are not member functions.
 - Support UDLs in ``static_assert`` message.
+- Fixed false positive error emitted by clang when performing qualified name
+  lookup and the current class instantiation has dependent bases.
+  Fixes (`#13826 `_)
 
 Bug Fixes to Compiler Builtins
 ^^

diff  --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 9591f8b87ba5456..67d0997b32e157a 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -442,7 +442,6 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, 
SourceLocation NameLoc,
   UsingShadowDecl *FoundUsingShadow = nullptr;
   switch (Result.getResultKind()) {
   case LookupResult::NotFound:
-  case LookupResult::NotFoundInCurrentInstantiation:
 if (CorrectedII) {
   TypeNameValidatorCCC CCC(/*AllowInvalid=*/true, isClassName,
AllowDeducedTemplate);
@@ -482,7 +481,18 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, 
SourceLocation NameLoc,
 }
   }
 }
-// If typo correction failed or was not performed, fall through
+Result.suppressDiagnostics();
+return nullptr;
+  case LookupResult::NotFoundInCurrentInstantiation:
+if (AllowImplicitTypename == ImplicitTypenameContext::Yes) {
+  QualType T = Context.getDependentNameType(ElaboratedTypeKeyword::None,
+SS->getScopeRep(), &II);
+  TypeLocBuilder TLB;
+  DependentNameTypeLoc TL = TLB.push(T);
+  TL.setQualifierLoc(SS->getWithLocInContext(Context));
+  TL.setNameLoc(NameLoc);
+  return CreateParsedType(T, TLB.getTypeSourceInfo(Context, T));
+}
 [[fallthrough]];
   case LookupResult::FoundOverloaded:
   case LookupResult::FoundUnresolvedValue:

diff  --git a/clang/test/SemaTemplate/dependent-base-classes.cpp 
b/clang/test/SemaTemplate/dependent-base-classes.cpp
index 09f475f8bde9183..92a37efaa7e73f6 100644
--- a/clang/test/SemaTemplate/dependent-base-classes.cpp
+++ b/clang/test/SemaTemplate/dependent-base-classes.cpp
@@ -130,3 +130,17 @@ namespace PR5812 {
 
   Derived di;
 }
+
+namespace GH13826 {
+template  struct A {
+  typedef int type;
+  struct B;
+};
+
+template  struct A::B : A {
+  B::type t;
+};
+
+A a;
+A::B b;
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] abc60e9 - [X86] vec_fabs.ll - add SSE test coverage

2023-11-30 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2023-11-30T10:07:00Z
New Revision: abc60e9808820c3f6614e6815909d43ed085460e

URL: 
https://github.com/llvm/llvm-project/commit/abc60e9808820c3f6614e6815909d43ed085460e
DIFF: 
https://github.com/llvm/llvm-project/commit/abc60e9808820c3f6614e6815909d43ed085460e.diff

LOG: [X86] vec_fabs.ll - add SSE test coverage

Added: 


Modified: 
llvm/test/CodeGen/X86/vec_fabs.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/vec_fabs.ll 
b/llvm/test/CodeGen/X86/vec_fabs.ll
index ec02dfda30c8502..c17341c2c8b077e 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -1,24 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s 
--check-prefixes=X86,X86-AVX512,X86-AVX512VL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X86,X86-AVX512,X86-AVX512FP16
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck 
%s --check-prefixes=X64,X64-AVX512,X64-AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X64,X64-AVX512,X64-AVX512FP16
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VLDQ
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=X86,X86-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s 
--check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VL
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512FP16
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VLDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=X64,X64-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck 
%s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck 
%s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512FP16
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | 
FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VLDQ
 
 ;
 ; 128-bit Vectors
 ;
 
-define <2 x double> @fabs_v2f64(<2 x double> %p) {
-; X86-AVX-LABEL: fabs_v2f64:
-; X86-AVX:   # %bb.0:
-; X86-AVX-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX-NEXT:retl
+define <2 x double> @fabs_v2f64(<2 x double> %p) nounwind {
+; X86-SSE-LABEL: fabs_v2f64:
+; X86-SSE:   # %bb.0:
+; X86-SSE-NEXT:andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT:retl
+;
+; X86-AVX1OR2-LABEL: fabs_v2f64:
+; X86-AVX1OR2:   # %bb.0:
+; X86-AVX1OR2-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1OR2-NEXT:retl
 ;
 ; X86-AVX512VL-LABEL: fabs_v2f64:
 ; X86-AVX512VL:   # %bb.0:
@@ -35,10 +42,15 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X86-AVX512VLDQ-NEXT:vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
 ; X86-AVX512VLDQ-NEXT:retl
 ;
-; X64-AVX-LABEL: fabs_v2f64:
-; X64-AVX:   # %bb.0:
-; X64-AVX-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX-NEXT:retq
+; X64-SSE-LABEL: fabs_v2f64:
+; X64-SSE:   # %bb.0:
+; X64-SSE-NEXT:andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE-NEXT:retq
+;
+; X64-AVX1OR2-LABEL: fabs_v2f64:
+; X64-AVX1OR2:   # %bb.0:
+; X64-AVX1OR2-NEXT:vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1OR2-NEXT:retq
 ;
 ; X64-AVX512VL-LABEL: fabs_v2f64:
 ; X64-AVX512VL:   

[llvm-branch-commits] [mlir] ae6eedd - [mlir] Fix two `CHECK:` typos (#73803)

2023-11-30 Thread via llvm-branch-commits

Author: Rik Huijzer
Date: 2023-11-30T10:19:27+01:00
New Revision: ae6eedd27556c32232f4e8be1292ffa0e1f16d90

URL: 
https://github.com/llvm/llvm-project/commit/ae6eedd27556c32232f4e8be1292ffa0e1f16d90
DIFF: 
https://github.com/llvm/llvm-project/commit/ae6eedd27556c32232f4e8be1292ffa0e1f16d90.diff

LOG: [mlir] Fix two `CHECK:` typos (#73803)

Out of curiosity, I ran [typos](https://github.com/crate-ci/typos)
against MLIR. It found two `CHECK:` typos (and many minor typos; which
I'm not gonna work on today).

Added: 


Modified: 
mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
mlir/test/Dialect/NVGPU/transform-pipeline-shared.mlir

Removed: 




diff  --git a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir 
b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
index 8acf358c887a987..55282e8334abd72 100644
--- a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
+++ b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir
@@ -6,7 +6,7 @@
 //  CHECK-SAME: %[[a:.*]]: index, %[[b:.*]]: index
 //   CHECK:   %[[apply:.*]] = affine.apply #[[$map]]()[%[[a]], %[[b]]]
 //   CHECK:   %[[apply:.*]] = affine.apply #[[$map]]()[%[[a]], %[[b]]]
-//   CHECL:   return %[[apply]]
+//   CHECK:   return %[[apply]]
 func.func @affine_apply(%a: index, %b: index) -> index {
   %0 = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%a, %b]
   %1 = "test.reify_bound"(%0) : (index) -> (index)

diff  --git a/mlir/test/Dialect/NVGPU/transform-pipeline-shared.mlir 
b/mlir/test/Dialect/NVGPU/transform-pipeline-shared.mlir
index 02aca49052ad173..42b072374261e0a 100644
--- a/mlir/test/Dialect/NVGPU/transform-pipeline-shared.mlir
+++ b/mlir/test/Dialect/NVGPU/transform-pipeline-shared.mlir
@@ -165,7 +165,7 @@ func.func @async_depth_2_peeled(%global: memref) {
   // CHECK:   nvgpu.device_async_copy
   // CHECK:   scf.yield
   // CHECK: nvgpu.device_async_wait %{{.*}} {numGroups = 1
-  // CHEKC: nvgpu.device_async_wait %{{.*}} {numGroups = 0
+  // CHECK: nvgpu.device_async_wait %{{.*}} {numGroups = 0
   scf.for %i = %c0 to %c98 step %c4 {
 %c96 = arith.constant 96 : index
 %cond = arith.cmpi slt, %i, %c96 : index



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] eb64697 - [X86][Codegen] Correct the domain of VP2INTERSECT

2023-11-30 Thread Shengchen Kan via llvm-branch-commits

Author: Shengchen Kan
Date: 2023-11-30T17:56:21+08:00
New Revision: eb64697a7b75d2b22041cc992fad0c8dfa7989cb

URL: 
https://github.com/llvm/llvm-project/commit/eb64697a7b75d2b22041cc992fad0c8dfa7989cb
DIFF: 
https://github.com/llvm/llvm-project/commit/eb64697a7b75d2b22041cc992fad0c8dfa7989cb.diff

LOG: [X86][Codegen] Correct the domain of VP2INTERSECT

GenericDomain -> SSEPackedInt

Found by #73654

Added: 


Modified: 
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index f325f47d46464c3..0514f0d19506707 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12875,8 +12875,10 @@ multiclass avx512_vp2intersect;
 defm VP2INTERSECTQ : avx512_vp2intersect, 
REX_W;
+}
 
 multiclass avx512_binop_all2 opc, string OpcodeStr,
  X86SchedWriteWidths sched,

diff  --git a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll 
b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
index ef07d30299e9dad..9741972767bcdcd 100644
--- a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
@@ -84,7 +84,7 @@ define void @test_mm256_2intersect_epi32_p(ptr nocapture 
readonly %a, ptr nocapt
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT:vmovaps (%edx), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfc,0x28,0x02]
+; X86-NEXT:vmovdqa (%edx), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfd,0x6f,0x02]
 ; X86-NEXT:vp2intersectd (%ecx), %ymm0, %k0 # encoding: 
[0x62,0xf2,0x7f,0x28,0x68,0x01]
 ; X86-NEXT:kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
@@ -96,7 +96,7 @@ define void @test_mm256_2intersect_epi32_p(ptr nocapture 
readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm256_2intersect_epi32_p:
 ; X64:   # %bb.0: # %entry
-; X64-NEXT:vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfc,0x28,0x07]
+; X64-NEXT:vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfd,0x6f,0x07]
 ; X64-NEXT:vp2intersectd (%rsi), %ymm0, %k0 # encoding: 
[0x62,0xf2,0x7f,0x28,0x68,0x06]
 ; X64-NEXT:kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
@@ -125,7 +125,7 @@ define void @test_mm256_2intersect_epi64_p(ptr nocapture 
readonly %a, ptr nocapt
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:vmovaps (%esi), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfc,0x28,0x06]
+; X86-NEXT:vmovdqa (%esi), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfd,0x6f,0x06]
 ; X86-NEXT:vp2intersectq (%edx), %ymm0, %k0 # encoding: 
[0x62,0xf2,0xff,0x28,0x68,0x02]
 ; X86-NEXT:kshiftlw $12, %k0, %k2 # encoding: 
[0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X86-NEXT:kshiftrw $12, %k2, %k2 # encoding: 
[0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -142,7 +142,7 @@ define void @test_mm256_2intersect_epi64_p(ptr nocapture 
readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm256_2intersect_epi64_p:
 ; X64:   # %bb.0: # %entry
-; X64-NEXT:vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfc,0x28,0x07]
+; X64-NEXT:vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc5,0xfd,0x6f,0x07]
 ; X64-NEXT:vp2intersectq (%rsi), %ymm0, %k0 # encoding: 
[0x62,0xf2,0xff,0x28,0x68,0x06]
 ; X64-NEXT:kshiftlw $12, %k0, %k2 # encoding: 
[0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X64-NEXT:kshiftrw $12, %k2, %k2 # encoding: 
[0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -175,7 +175,7 @@ define void @test_mm256_2intersect_epi32_b(ptr nocapture 
readonly %a, ptr nocapt
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT:vbroadcastss (%edx), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc4,0xe2,0x7d,0x18,0x02]
+; X86-NEXT:vpbroadcastd (%edx), %ymm0 # EVEX TO VEX Compression encoding: 
[0xc4,0xe2,0x7d,0x58,0x02]
 ; X86-NEXT:vp2intersectd (%ecx){1to8}, %ymm0, %k0 # encoding: 
[0x62,0xf2,0x7f,0x38,0x68,0x01]
 ;

[llvm-branch-commits] [mlir] e9869b5 - [mlir][docgen] Add ops source link (#73657)

2023-11-30 Thread via llvm-branch-commits

Author: Rik Huijzer
Date: 2023-11-30T10:29:50+01:00
New Revision: e9869b57707fc6dd828872a70c2f377cc0061978

URL: 
https://github.com/llvm/llvm-project/commit/e9869b57707fc6dd828872a70c2f377cc0061978
DIFF: 
https://github.com/llvm/llvm-project/commit/e9869b57707fc6dd828872a70c2f377cc0061978.diff

LOG: [mlir][docgen] Add ops source link (#73657)

This patch suggests to change two things. Firstly, it adds a source link
above the generated operations docs (above the `emitOpDoc` calls). This
link will point directly to the source TableGen file for the group of
operations. For example, for the current
[`amdgpu`](https://mlir.llvm.org/docs/Dialects/AMDGPU/) page, the link
will add a source link below the "Operation definition" heading pointing
to
[`mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td`](https://github.com/llvm/llvm-project/blob/main/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td).
The link is wrapped in a "op-definitions-source-link" class which could
allow for custom styling, but it also looks reasonable without custom
styling I think:

![afbeelding](https://github.com/llvm/llvm-project/assets/20724914/7c0e59b9-b14b-4f5d-a671-c87e857a7b03)

Secondly, this patch simplifies the header names such as "Operation
definition" and "Attribute definition" to "Operations" and "Attributes"
respectively. This is in line with manually defined subheadings on pages
such as the one for the
[`vector`](https://mlir.llvm.org/docs/Dialects/Vector/#operations)
dialect.

Added: 


Modified: 
mlir/test/mlir-tblgen/gen-dialect-doc.td
mlir/tools/mlir-tblgen/OpDocGen.cpp

Removed: 




diff  --git a/mlir/test/mlir-tblgen/gen-dialect-doc.td 
b/mlir/test/mlir-tblgen/gen-dialect-doc.td
index 120164345fb2ce3..ca0b6e38edf82c2 100644
--- a/mlir/test/mlir-tblgen/gen-dialect-doc.td
+++ b/mlir/test/mlir-tblgen/gen-dialect-doc.td
@@ -85,7 +85,7 @@ def TestTypeDefParams : TypeDef {
 // CHECK: Interfaces: NoMemoryEffect (MemoryEffectOpInterface)
 // CHECK: Effects: MemoryEffects::Effect{}
 
-// CHECK: ## Attribute constraint definition
+// CHECK: ## Attribute constraints
 // CHECK: ### attribute summary
 // CHECK: attribute description
 
@@ -97,7 +97,7 @@ def TestTypeDefParams : TypeDef {
 // CHECK: Syntax:
 // CHECK: #test.test_attr_def_params
 
-// CHECK: ## Type constraint definition
+// CHECK: ## Type constraints
 // CHECK: ### type summary
 // CHECK: type description
 

diff  --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp 
b/mlir/tools/mlir-tblgen/OpDocGen.cpp
index 773ad6ec198b957..b5b26a70859eced 100644
--- a/mlir/tools/mlir-tblgen/OpDocGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp
@@ -265,10 +265,22 @@ static void emitOpDoc(const Operator &op, raw_ostream 
&os) {
   os << "\n";
 }
 
+static void emitSourceLink(StringRef inputFilename, raw_ostream &os) {
+  size_t pathBegin = inputFilename.find("mlir/include/mlir/");
+  if (pathBegin == StringRef::npos)
+return;
+
+  StringRef inputFromMlirInclude = inputFilename.substr(pathBegin);
+
+  os << "[source](https://github.com/llvm/llvm-project/blob/main/";
+ << inputFromMlirInclude << ")\n\n";
+}
+
 static void emitOpDoc(const RecordKeeper &recordKeeper, raw_ostream &os) {
   auto opDefs = getRequestedOpDefinitions(recordKeeper);
 
   os << "\n";
+  emitSourceLink(recordKeeper.getInputFilename(), os);
   for (const llvm::Record *opDef : opDefs)
 emitOpDoc(Operator(opDef), os);
 }
@@ -392,12 +404,13 @@ static void maybeNest(bool nest, 
llvm::function_ref fn,
   }
 }
 
-static void emitBlock(ArrayRef attributes,
+static void emitBlock(ArrayRef attributes, StringRef inputFilename,
   ArrayRef attrDefs, ArrayRef ops,
   ArrayRef types, ArrayRef typeDefs,
   raw_ostream &os) {
   if (!ops.empty()) {
-os << "## Operation definition\n\n";
+os << "## Operations\n\n";
+emitSourceLink(inputFilename, os);
 for (const OpDocGroup &grouping : ops) {
   bool nested = !grouping.summary.empty();
   maybeNest(
@@ -417,32 +430,32 @@ static void emitBlock(ArrayRef attributes,
   }
 
   if (!attributes.empty()) {
-os << "## Attribute constraint definition\n\n";
+os << "## Attribute constraints\n\n";
 for (const Attribute &attr : attributes)
   emitAttrDoc(attr, os);
   }
 
   if (!attrDefs.empty()) {
-os << "## Attribute definition\n\n";
+os << "## Attributes\n\n";
 for (const AttrDef &def : attrDefs)
   emitAttrOrTypeDefDoc(def, os);
   }
 
   // TODO: Add link between use and def for types
   if (!types.empty()) {
-os << "## Type constraint definition\n\n";
+os << "## Type constraints\n\n";
 for (const Type &type : types)
   emitTypeDoc(type, os);
   }
 
   if (!typeDefs.empty()) {
-os << "## Type definition\n\n";
+os << "## Types\n\n";
 for (const TypeDef &def : typeDefs)
   emitAttrOrTypeDefDoc(def, os);
   }
 }
 
-static void emitDialectDoc(const Dialec

[llvm-branch-commits] [llvm] b724561 - [LoongArch] Add codegen support for extractelement (#73759)

2023-11-30 Thread via llvm-branch-commits

Author: wanglei
Date: 2023-11-30T17:29:18+08:00
New Revision: b72456120f1db38ed7068fb592fcf768c6d5cce2

URL: 
https://github.com/llvm/llvm-project/commit/b72456120f1db38ed7068fb592fcf768c6d5cce2
DIFF: 
https://github.com/llvm/llvm-project/commit/b72456120f1db38ed7068fb592fcf768c6d5cce2.diff

LOG: [LoongArch] Add codegen support for extractelement (#73759)

Add codegen support for extractelement when enable `lsx` or `lasx`
feature.

Added: 
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll

Modified: 
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Removed: 




diff  --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ac78789c2c331df..f59beca523cbbc6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -247,6 +247,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const 
TargetMachine &TM,
   // will be `Custom` handled in the future.
   setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
 }
 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
   setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -276,6 +277,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const 
TargetMachine &TM,
   // FIXME: Same as above.
   setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
 }
 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
   setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);

diff  --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp 
b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index ddd1c9943fac016..6576100d3b32186 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -90,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 Opc = LoongArch::FMOV_S;
   } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) {
 Opc = LoongArch::FMOV_D;
+  } else if (LoongArch::GPRRegClass.contains(DstReg) &&
+ LoongArch::FPR32RegClass.contains(SrcReg)) {
+// FPR32 -> GPR copies
+Opc = LoongArch::MOVFR2GR_S;
+  } else if (LoongArch::GPRRegClass.contains(DstReg) &&
+ LoongArch::FPR64RegClass.contains(SrcReg)) {
+// FPR64 -> GPR copies
+Opc = LoongArch::MOVFR2GR_D;
   } else {
 // TODO: support other copies.
 llvm_unreachable("Impossible reg-to-reg copy");

diff  --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index e19aa92266b1f9f..380206ddcf1066a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1401,6 +1401,44 @@ foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] 
in {
   def  : RegRegStPat;
 }
 
+// Vector extraction with constant index.
+def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
+  (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
+def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
+  (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
+def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)),
+  (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>;
+def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)),
+  (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>;
+def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)),
+  (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), 
sub_32))>;
+def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)),
+  (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), 
sub_64))>;
+
+// Vector extraction with variable index.
+def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)),
+  (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj,
+i64:$rk),
+ sub_32)),
+GPR), (i64 24))>;
+def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)),
+  (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H 
v16i16:$xj,
+i64:$rk),
+ sub_32)),
+   

[llvm-branch-commits] [mlir] f1fba63 - [mlir][vector][doc] 0D vectors (#73792)

2023-11-30 Thread via llvm-branch-commits

Author: Rik Huijzer
Date: 2023-11-30T10:11:50+01:00
New Revision: f1fba63f1282a21e6adc5f7732e8583c52792744

URL: 
https://github.com/llvm/llvm-project/commit/f1fba63f1282a21e6adc5f7732e8583c52792744
DIFF: 
https://github.com/llvm/llvm-project/commit/f1fba63f1282a21e6adc5f7732e8583c52792744.diff

LOG: [mlir][vector][doc] 0D vectors (#73792)

Currently, the only mention of 0d vectors in the MLIR source code that I
could find was:

> 0D vectors are allowed by omitting the dimension: `vector`.

in
[`BuiltinTypes.td`](https://github.com/llvm/llvm-project/blob/437a48b2d9bfc6e38a0ca43f1cee48ceaf0fe249/mlir/include/mlir/IR/BuiltinTypes.td#L1046-L1047).

This patch adds a summary of
https://discourse.llvm.org/t/what-is-the-semantics-of-memref-0xf32-and-tensor-0xf32/3557
and https://discourse.llvm.org/t/should-we-have-0-d-vectors/3097/5.

Added: 


Modified: 
mlir/docs/Dialects/Vector.md

Removed: 




diff  --git a/mlir/docs/Dialects/Vector.md b/mlir/docs/Dialects/Vector.md
index a907d59566366bb..6d05d9b90467662 100644
--- a/mlir/docs/Dialects/Vector.md
+++ b/mlir/docs/Dialects/Vector.md
@@ -247,7 +247,19 @@ which conveys higher-D meaning. But it also is one of the 
most overloaded terms
 in compilers and hardware. For now, we generally use the `n-D` `vector` name 
and
 are open to better suggestions.
 
-## DeeperDive
+## 0D Vectors
+
+Vectors of dimension 0 (or _0-D vectors_ or _0D vectors_) are allowed inside
+MLIR. For instance, a `f32` vector containing one scalar can be denoted as
+`vector`. This is similar to the `tensor` type that is available in
+TensorFlow or the `memref` type that is available in MLIR.
+
+Generally, a 0D `vector` can be interpreted as a scalar. The benefit of 0D
+`vector`s, `tensor`s, and `memref`s is that they make it easier to lower code
+from various frontends such as TensorFlow and make it easier to handle corner
+cases such as unrolling a loop from 1D to 0D.
+
+## LLVM Lowering Tradeoffs
 
 This section describes the tradeoffs involved in lowering the MLIR n-D vector
 type and operations on it to LLVM-IR. Putting aside the



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 8a66510 - [AMDGPU] Don't create mulhi_24 in CGP (#72983)

2023-11-30 Thread via llvm-branch-commits

Author: Pierre van Houtryve
Date: 2023-11-30T08:26:45+01:00
New Revision: 8a66510fa73c1507c2a58338e180ddb075993a5a

URL: 
https://github.com/llvm/llvm-project/commit/8a66510fa73c1507c2a58338e180ddb075993a5a
DIFF: 
https://github.com/llvm/llvm-project/commit/8a66510fa73c1507c2a58338e180ddb075993a5a.diff

LOG: [AMDGPU] Don't create mulhi_24 in CGP (#72983)

Instead, create a mul24 with a 64 bit result and let ISel take care of
it.

This allows patterns to simply match mul24 even for 64-bit muls instead of 
having to match both mul/mulhi and a buildvector/bitconvert/etc.

Added: 


Modified: 
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
llvm/test/CodeGen/AMDGPU/mul_int24.ll
llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll

Removed: 




diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f76e88eab8e4a36..06f9c0445bceac0 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1998,12 +1998,14 @@ def int_amdgcn_alignbyte : 
ClangBuiltin<"__builtin_amdgcn_alignbyte">,
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
+// mul24 intrinsics can return i32 or i64.
+// When returning i64, they're lowered to a mul24/mulhi24 pair.
+def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
   [llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
+def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
   [llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 4cce34bdeabcf44..4caa9cd9225b690 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -624,34 +624,6 @@ static Value *insertValues(IRBuilder<> &Builder,
   return NewVal;
 }
 
-// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
-// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
-// width of the original destination.
-static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
-   unsigned Size, unsigned NumBits, bool IsSigned) {
-  if (Size <= 32 || NumBits <= 32) {
-Intrinsic::ID ID =
-IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
-return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
-  }
-
-  assert(NumBits <= 48);
-
-  Intrinsic::ID LoID =
-  IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
-  Intrinsic::ID HiID =
-  IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;
-
-  Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
-  Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});
-
-  IntegerType *I64Ty = Builder.getInt64Ty();
-  Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
-  Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);
-
-  return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
-}
-
 bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
   if (I.getOpcode() != Instruction::Mul)
 return false;
@@ -691,26 +663,20 @@ bool 
AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
   extractValues(Builder, RHSVals, RHS);
 
   IntegerType *I32Ty = Builder.getInt32Ty();
-  for (int I = 0, E = LHSVals.size(); I != E; ++I) {
-Value *LHS, *RHS;
-if (IsSigned) {
-  LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
-  RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
-} else {
-  LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
-  RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
-}
+  IntegerType *IntrinTy = Size > 32 ? Builder.getInt64Ty() : I32Ty;
+  Type *DstTy = LHSVals[0]->getType();
 
-Value *Result =
-getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);
-
-if (IsSigned) {
-  ResultVals.push_back(
-  Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
-} else {
-  ResultVals.push_back(
-  Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
-}
+  for (int I = 0, E = LHSVals.size(); I != E; ++I) {
+Value *LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty)
+  : Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+Value *RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty)
+  : Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
+Intrinsic::ID ID =
+IsSigned ? Intrinsic::am

[llvm-branch-commits] [llvm] 511ba45 - [X86][MC][CodeGen] Support EGPR for KMOV (#73781)

2023-11-30 Thread via llvm-branch-commits

Author: Shengchen Kan
Date: 2023-11-30T16:13:51+08:00
New Revision: 511ba45a47d6f9e48ad364181830c9fb974135b2

URL: 
https://github.com/llvm/llvm-project/commit/511ba45a47d6f9e48ad364181830c9fb974135b2
DIFF: 
https://github.com/llvm/llvm-project/commit/511ba45a47d6f9e48ad364181830c9fb974135b2.diff

LOG: [X86][MC][CodeGen] Support EGPR for KMOV (#73781)

KMOV is essential for copy between k-registers and GPRs.
R16-R31 was added into GPRs in #70958, so we extend KMOV for these new
registers first.

This patch
1.  Promotes KMOV instructions from VEX space to EVEX space
2.  Emits prefix {evex} for the EVEX variants
3. Prefers EVEX variant than VEX variant in ISEL and optimizations for
better RA

EVEX variants will be compressed to VEX variants by existing EVEX2VEX
pass if no EGPR is used.

RFC:
https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
TAG: llvm-test-suite && CPU2017 can be built with feature egpr
successfully.

Added: 
llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll
llvm/test/CodeGen/X86/apx/kmov-isel.ll
llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
llvm/test/MC/Disassembler/X86/apx/kmov.txt
llvm/test/MC/X86/apx/kmov-att.s
llvm/test/MC/X86/apx/kmov-intel.s

Modified: 
llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
llvm/lib/Target/X86/X86DomainReassignment.cpp
llvm/lib/Target/X86/X86ExpandPseudo.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrInfo.td
llvm/test/TableGen/x86-fold-tables.inc

Removed: 




diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp 
b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index aadbc3845b79c18..cab2f0a2e1c1a2b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, 
raw_ostream &O,
 O << "\t{vex2}";
   else if (Flags & X86::IP_USE_VEX3)
 O << "\t{vex3}";
-  else if (Flags & X86::IP_USE_EVEX)
+  else if ((Flags & X86::IP_USE_EVEX) ||
+   (TSFlags & X86II::ExplicitOpPrefixMask) == 
X86II::ExplicitEVEXPrefix)
 O << "\t{evex}";
 
   if (Flags & X86::IP_USE_DISP8)

diff  --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp 
b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index fa8d5c752a3d273..be7e8db95b98ed8 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() {
 std::make_unique(From, To);
   };
 
-  createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
-  createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
+  bool HasEGPR = STI->hasEGPR();
+  createReplacerDstCOPY(X86::MOVZX32rm16,
+HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+  createReplacerDstCOPY(X86::MOVZX64rm16,
+HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
 
   createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
   createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
 
   if (STI->hasDQI()) {
-createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
-createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
-createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
+createReplacerDstCOPY(X86::MOVZX16rm8,
+  HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+createReplacerDstCOPY(X86::MOVZX32rm8,
+  HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+createReplacerDstCOPY(X86::MOVZX64rm8,
+  HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
 
 createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
 createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
@@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() {
 Converters[{MaskDomain, From}] = std::make_unique(From, To);
   };
 
-  createReplacer(X86::MOV16rm, X86::KMOVWkm);
-  createReplacer(X86::MOV16mr, X86::KMOVWmk);
+  createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+  createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
   createReplacer(X86::MOV16rr, X86::KMOVWkk);
   createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
   createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
@@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() {
   createReplacer(X86::XOR16rr, X86::KXORWrr);
 
   if (STI->hasBWI()) {
-createReplacer(X86::MOV32rm, X86::KMOVDkm);
-createReplacer(X86::MOV64rm, X86::KMOVQkm);
+createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
+createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
 
-createReplacer(X86::MOV32mr, X86::KMOVDmk);
-createReplacer(X86:

[llvm-branch-commits] [clang] 030047c - [Clang] Eagerly instantiate used constexpr function upon definition. (#73463)

2023-11-30 Thread via llvm-branch-commits

Author: cor3ntin
Date: 2023-11-30T08:45:05+01:00
New Revision: 030047c432cac133738be68fa0974f70e69dd58d

URL: 
https://github.com/llvm/llvm-project/commit/030047c432cac133738be68fa0974f70e69dd58d
DIFF: 
https://github.com/llvm/llvm-project/commit/030047c432cac133738be68fa0974f70e69dd58d.diff

LOG: [Clang] Eagerly instantiate used constexpr function upon definition. 
(#73463)

Despite CWG2497 not being resolved, it is reasonable to expect the
following code to compile (and which is supported by other compilers)

```cpp
  template constexpr T f();
  constexpr int g() { return f(); } // #1
  template constexpr T f() { return 123; }
  int k[g()];
  // #2
```

To that end, we eagerly instantiate all referenced specializations of
constexpr functions when they are defined.

We maintain a map of (pattern, [instantiations]) independent of
`PendingInstantiations` to avoid having to iterate that list after each
function definition.

We should apply the same logic to constexpr variables, but I wanted to
keep the PR small.

Fixes #73232

Added: 
clang/test/PCH/instantiate-used-constexpr-function.cpp
clang/test/SemaTemplate/instantiate-used-constexpr-function.cpp

Modified: 
clang/docs/ReleaseNotes.rst
clang/include/clang/Sema/ExternalSemaSource.h
clang/include/clang/Sema/MultiplexExternalSemaSource.h
clang/include/clang/Sema/Sema.h
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/lib/Sema/MultiplexExternalSemaSource.cpp
clang/lib/Sema/SemaDecl.cpp
clang/lib/Sema/SemaExpr.cpp
clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8d2b60dd75acfee..7d64647433d92a7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -793,6 +793,11 @@ Bug Fixes to C++ Support
 - Fix crash when parsing nested requirement. Fixes:
   (`#73112 `_)
 
+- Clang now immediately instantiates function template specializations
+  at the end of the definition of the corresponding function template
+  when the definition appears after the first point of instantiation.
+  (`#73232 `_)
+
 Bug Fixes to AST Handling
 ^
 - Fixed an import failure of recursive friend class template.

diff  --git a/clang/include/clang/Sema/ExternalSemaSource.h 
b/clang/include/clang/Sema/ExternalSemaSource.h
index 22d1ee2df115a6e..8b41c5483458a0f 100644
--- a/clang/include/clang/Sema/ExternalSemaSource.h
+++ b/clang/include/clang/Sema/ExternalSemaSource.h
@@ -181,6 +181,9 @@ class ExternalSemaSource : public ExternalASTSource {
  SmallVectorImpl > &Pending) {}
 
+  virtual void ReadPendingInstantiationsOfConstexprEntity(
+  const NamedDecl *D, llvm::SmallSetVector &Decls){};
+
   /// Read the set of late parsed template functions for this source.
   ///
   /// The external source should insert its own late parsed template functions

diff  --git a/clang/include/clang/Sema/MultiplexExternalSemaSource.h 
b/clang/include/clang/Sema/MultiplexExternalSemaSource.h
index 2bf91cb5212c5eb..6054ef39e54ff9a 100644
--- a/clang/include/clang/Sema/MultiplexExternalSemaSource.h
+++ b/clang/include/clang/Sema/MultiplexExternalSemaSource.h
@@ -319,6 +319,9 @@ class MultiplexExternalSemaSource : public 
ExternalSemaSource {
   void ReadPendingInstantiations(
  SmallVectorImpl >& Pending) 
override;
 
+  virtual void ReadPendingInstantiationsOfConstexprEntity(
+  const NamedDecl *D, llvm::SmallSetVector &Decls) 
override;
+
   /// Read the set of late parsed template functions for this source.
   ///
   /// The external source should insert its own late parsed template functions

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index e75a8bdb1fc72ff..6de1a098e067a38 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -59,6 +59,7 @@
 #include "clang/Sema/TypoCorrection.h"
 #include "clang/Sema/Weak.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -10087,6 +10088,12 @@ class Sema final {
   /// but have not yet been performed.
   std::deque PendingInstantiations;
 
+  /// Track constexpr functions referenced before they are (lexically) defined.
+  /// The key is the pattern, associated with a list of specialisations that
+  /// need to be instantiated when the pattern is defined.
+  llvm::DenseMap>
+  PendingInstantiationsOfConstexprEntities;
+
   /// Queue of implicit template instantiations that cannot be performed
   /// eagerly.
   SmallVector LateParsedInstantiations;
@@ -10405,

[llvm-branch-commits] [clang] 5891a8f - [clang] Remove extra '; ' in MultiplexExternalSemaSource.cpp (NFC)

2023-11-30 Thread Jie Fu via llvm-branch-commits

Author: Jie Fu
Date: 2023-11-30T15:53:36+08:00
New Revision: 5891a8f7ce0a7b866a5bc06c34092fbdb28dda1c

URL: 
https://github.com/llvm/llvm-project/commit/5891a8f7ce0a7b866a5bc06c34092fbdb28dda1c
DIFF: 
https://github.com/llvm/llvm-project/commit/5891a8f7ce0a7b866a5bc06c34092fbdb28dda1c.diff

LOG: [clang] Remove extra ';' in MultiplexExternalSemaSource.cpp (NFC)

/llvm-project/clang/lib/Sema/MultiplexExternalSemaSource.cpp:317:2:
error: extra ';' outside of a function is incompatible with C++98 
[-Werror,-Wc++98-compat-extra-semi]
};
 ^
1 error generated.

Added: 


Modified: 
clang/lib/Sema/MultiplexExternalSemaSource.cpp

Removed: 




diff  --git a/clang/lib/Sema/MultiplexExternalSemaSource.cpp 
b/clang/lib/Sema/MultiplexExternalSemaSource.cpp
index d0d6a3a866d62d2..100794de60ee03d 100644
--- a/clang/lib/Sema/MultiplexExternalSemaSource.cpp
+++ b/clang/lib/Sema/MultiplexExternalSemaSource.cpp
@@ -314,7 +314,7 @@ void 
MultiplexExternalSemaSource::ReadPendingInstantiationsOfConstexprEntity(
 const NamedDecl *D, llvm::SmallSetVector &Decls) {
   for (size_t i = 0; i < Sources.size(); ++i)
 Sources[i]->ReadPendingInstantiationsOfConstexprEntity(D, Decls);
-};
+}
 
 void MultiplexExternalSemaSource::ReadLateParsedTemplates(
 llvm::MapVector>



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c0b9269 - [RISCV] Add helper to copy the AVL of another VSETVLIInfo. NFC

2023-11-30 Thread Luke Lau via llvm-branch-commits

Author: Luke Lau
Date: 2023-11-30T15:19:46+08:00
New Revision: c0b926939829d9d4bb6ac5825e62f30960b6ed22

URL: 
https://github.com/llvm/llvm-project/commit/c0b926939829d9d4bb6ac5825e62f30960b6ed22
DIFF: 
https://github.com/llvm/llvm-project/commit/c0b926939829d9d4bb6ac5825e62f30960b6ed22.diff

LOG: [RISCV] Add helper to copy the AVL of another VSETVLIInfo. NFC

Added: 


Modified: 
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Removed: 




diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp 
b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 3bbc85d836c3f4a..3bb648359e39dd6 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -477,6 +477,18 @@ class VSETVLIInfo {
 return AVLImm;
   }
 
+  void setAVL(VSETVLIInfo Info) {
+assert(Info.isValid());
+if (Info.isUnknown())
+  setUnknown();
+else if (Info.hasAVLReg())
+  setAVLReg(Info.getAVLReg());
+else {
+  assert(Info.hasAVLImm());
+  setAVLImm(Info.getAVLImm());
+}
+  }
+
   unsigned getSEW() const { return SEW; }
   RISCVII::VLMUL getVLMUL() const { return VLMul; }
 
@@ -1054,10 +1066,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo 
&Info,
   // TODO: We can probably relax this for immediates.
   if (Demanded.VLZeroness && !Demanded.VLAny && PrevInfo.isValid() &&
   PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) {
-if (PrevInfo.hasAVLImm())
-  Info.setAVLImm(PrevInfo.getAVLImm());
-else
-  Info.setAVLReg(PrevInfo.getAVLReg());
+Info.setAVL(PrevInfo);
 return;
   }
 
@@ -1074,10 +1083,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo 
&Info,
   VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
   if (DefInfo.hasSameVLMAX(Info) &&
   (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
-if (DefInfo.hasAVLImm())
-  Info.setAVLImm(DefInfo.getAVLImm());
-else
-  Info.setAVLReg(DefInfo.getAVLReg());
+Info.setAVL(DefInfo);
 return;
   }
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] 586986a - [Flang] Add multiline error message support to pass-plugin-not-found (NFC) (#73601)

2023-11-30 Thread via llvm-branch-commits

Author: madanial0
Date: 2023-11-30T01:35:03-05:00
New Revision: 586986a063ee4b9a7490aac102e103bab121c764

URL: 
https://github.com/llvm/llvm-project/commit/586986a063ee4b9a7490aac102e103bab121c764
DIFF: 
https://github.com/llvm/llvm-project/commit/586986a063ee4b9a7490aac102e103bab121c764.diff

LOG: [Flang] Add multiline error message support to pass-plugin-not-found (NFC) 
(#73601)

The error message above has multiple lines on AIX, adding
`{{[[:space:]].*}}` to match multiple lines

Co-authored-by: Mark Danial 

Added: 


Modified: 
flang/test/Driver/pass-plugin-not-found.f90

Removed: 




diff  --git a/flang/test/Driver/pass-plugin-not-found.f90 
b/flang/test/Driver/pass-plugin-not-found.f90
index 08dd29e5dab0683..fc1e690a0cc72bb 100644
--- a/flang/test/Driver/pass-plugin-not-found.f90
+++ b/flang/test/Driver/pass-plugin-not-found.f90
@@ -6,4 +6,4 @@
 ! RUN: not %flang_fc1 -emit-llvm -o /dev/null -fpass-plugin=X.Y %s 2>&1 | 
FileCheck %s --check-prefix=ERROR
 
 ! The exact wording of the error message depends on the system dlerror.
-! ERROR: error: unable to load plugin 'X.Y': 'Could not load library 'X.Y': 
{{.*}}: {{.*}}{{[Nn]}}o such file{{.*}}'
+! ERROR: error: unable to load plugin 'X.Y': 'Could not load library 'X.Y': 
{{.*}}{{[[:space:]].*}}{{.*}}: {{.*}}{{[Nn]}}o such file{{.*}}'



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 68106bd - [Sample Profile Loader] Fix potential invalidated reference (#73181)

2023-11-30 Thread via llvm-branch-commits

Author: William Junda Huang
Date: 2023-11-29T21:48:55Z
New Revision: 68106bd492e294ecf0a7e2829dd9edf6cd72f3ef

URL: 
https://github.com/llvm/llvm-project/commit/68106bd492e294ecf0a7e2829dd9edf6cd72f3ef
DIFF: 
https://github.com/llvm/llvm-project/commit/68106bd492e294ecf0a7e2829dd9edf6cd72f3ef.diff

LOG: [Sample Profile Loader]  Fix potential invalidated reference (#73181)

There is a potential issue in ProfiledCallGraph where pointers to
ProfiledCallGraphNode are used to construct edges, while
ProfiledCallGraphNode instances are being added to a hash map
ProfiledFunctions simultaneously. If rehash happens, those pointers are
invalidated, resulting in undefined behavior/crash. Previously (before
md5phase2) ProfiledFunctions is a llvm::StringMap, which also have the
same issue theoretically when rehashing but was not observed. This patch
fixes this potential issue by using a backing buffer for
ProrfiledCallGraphNode that does not relocate.

Added: 


Modified: 
llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h

Removed: 




diff  --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h 
b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 3f986caeb547287..5381ada37fe27e9 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -140,8 +140,11 @@ class ProfiledCallGraph {
 if (!ProfiledFunctions.count(Name)) {
   // Link to synthetic root to make sure every node is reachable
   // from root. This does not affect SCC order.
-  ProfiledFunctions[Name] = ProfiledCallGraphNode(Name);
-  Root.Edges.emplace(&Root, &ProfiledFunctions[Name], 0);
+  // Store the pointer of the node because the map can be rehashed.
+  auto &Node =
+  ProfiledCallGraphNodeList.emplace_back(ProfiledCallGraphNode(Name));
+  ProfiledFunctions[Name] = &Node;
+  Root.Edges.emplace(&Root, ProfiledFunctions[Name], 0);
 }
   }
 
@@ -152,9 +155,9 @@ class ProfiledCallGraph {
 auto CalleeIt = ProfiledFunctions.find(CalleeName);
 if (CalleeIt == ProfiledFunctions.end())
   return;
-ProfiledCallGraphEdge Edge(&ProfiledFunctions[CallerName],
-   &CalleeIt->second, Weight);
-auto &Edges = ProfiledFunctions[CallerName].Edges;
+ProfiledCallGraphEdge Edge(ProfiledFunctions[CallerName],
+   CalleeIt->second, Weight);
+auto &Edges = ProfiledFunctions[CallerName]->Edges;
 auto EdgeIt = Edges.find(Edge);
 if (EdgeIt == Edges.end()) {
   Edges.insert(Edge);
@@ -193,7 +196,7 @@ class ProfiledCallGraph {
   return;
 
 for (auto &Node : ProfiledFunctions) {
-  auto &Edges = Node.second.Edges;
+  auto &Edges = Node.second->Edges;
   auto I = Edges.begin();
   while (I != Edges.end()) {
 if (I->Weight <= Threshold)
@@ -205,7 +208,9 @@ class ProfiledCallGraph {
   }
 
   ProfiledCallGraphNode Root;
-  HashKeyMap
+  // backing buffer for ProfiledCallGraphNodes.
+  std::list ProfiledCallGraphNodeList;
+  HashKeyMap
   ProfiledFunctions;
 };
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] 9ac64ab - [libc++] Remove linux Buildkite builders entirely (#73825)

2023-11-30 Thread via llvm-branch-commits

Author: Eric
Date: 2023-11-29T17:34:45-05:00
New Revision: 9ac64abc02c1f4433931cf0323c12663df02b14e

URL: 
https://github.com/llvm/llvm-project/commit/9ac64abc02c1f4433931cf0323c12663df02b14e
DIFF: 
https://github.com/llvm/llvm-project/commit/9ac64abc02c1f4433931cf0323c12663df02b14e.diff

LOG: [libc++] Remove linux Buildkite builders entirely (#73825)

This removes the Google hosted Linux buildkite builders. We have since
moved all of them over to github actions.

Follow up changes will be sent for android.

Added: 


Modified: 
.github/workflows/libcxx-build-and-test.yaml
libcxx/utils/ci/buildkite-pipeline.yml

Removed: 




diff  --git a/.github/workflows/libcxx-build-and-test.yaml 
b/.github/workflows/libcxx-build-and-test.yaml
index e929c02d11c1d93..5349ddd499c22d1 100644
--- a/.github/workflows/libcxx-build-and-test.yaml
+++ b/.github/workflows/libcxx-build-and-test.yaml
@@ -160,7 +160,11 @@ jobs:
   'generic-no-unicode',
   'generic-no-wide-characters',
   'generic-static',
-  'generic-with_llvm_unwinder'
+  'generic-with_llvm_unwinder',
+  # TODO Find a better place for the benchmark and bootstrapping 
builds to live. They're either very expensive
+  # or don't provide much value since the benchmark run results are 
too noise on the bots.
+  'benchmarks',
+  'bootstrapping-build'
 ]
 machine: [ 'libcxx-runners-8' ]
 std_modules: [ 'OFF' ]

diff  --git a/libcxx/utils/ci/buildkite-pipeline.yml 
b/libcxx/utils/ci/buildkite-pipeline.yml
index 06b0c55e6f3cab5..51b026247fd 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -43,10 +43,6 @@ definitions:
 
 # Define agents using YAML anchors to reduce duplication
 agents_definitions:
-  _linux_agent: &linux_agent
-agents:
-  queue: libcxx-builders
-  os: linux
   _windows_agent: &windows_agent
 agents:
   queue: windows
@@ -101,20 +97,6 @@ environment_definitions:
 
 
 steps:
-- label: Bootstrapping build
-  command: libcxx/utils/ci/run-buildbot bootstrapping-build
-  env:
-<<: *common_env
-  <<: *linux_agent
-  <<: *common
-
-- label: Benchmarks
-  command: libcxx/utils/ci/run-buildbot benchmarks
-  env:
-<<: *common_env
-  <<: *linux_agent
-  <<: *common
-
 - group: ':windows: Windows'
   steps:
   - label: Clang-cl (DLL)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] be811d1 - [libc++] Run picolibc tests with qemu

2023-11-30 Thread Louis Dionne via llvm-branch-commits

Author: Michael Platings
Date: 2023-11-29T17:21:08-05:00
New Revision: be811d1617654e46f4f4daa82259ae4fad4c8e6a

URL: 
https://github.com/llvm/llvm-project/commit/be811d1617654e46f4f4daa82259ae4fad4c8e6a
DIFF: 
https://github.com/llvm/llvm-project/commit/be811d1617654e46f4f4daa82259ae4fad4c8e6a.diff

LOG: [libc++] Run picolibc tests with qemu

This patch actually runs the tests for picolibc behind an emulator,
removing a few workarounds and increasing coverage.

Differential Revision: https://reviews.llvm.org/D155521

Added: 


Modified: 
libcxx/cmake/caches/Armv7M-picolibc.cmake
libcxx/docs/index.rst
libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
libcxx/test/libcxx/selftest/dsl/dsl.sh.py
libcxx/test/libcxx/selftest/pass.cpp/run-error.pass.cpp
libcxx/test/libcxx/selftest/pass.mm/run-error.pass.mm
libcxx/test/libcxx/selftest/stdin-is-piped.sh.cpp
libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp

libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp

libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp

libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp

libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp

libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp

libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp

libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array14.pass.cpp

libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete14.pass.cpp

libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp

libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp

libcxx/test/std/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/eval.PR44847.pass.cpp
libcxx/test/std/time/time.clock/time.clock.file/now.pass.cpp
libcxxabi/test/configs/armv7m-picolibc-libc++abi.cfg.in
libunwind/test/configs/armv7m-picolibc-libunwind.cfg.in
libunwind/test/libunwind_02.pass.cpp

Removed: 




diff  --git a/libcxx/cmake/caches/Armv7M-picolibc.cmake 
b/libcxx/cmake/caches/Armv7M-picolibc.cmake
index 6ed1866a50845fe..9f8863943444b65 100644
--- a/libcxx/cmake/caches/Armv7M-picolibc.cmake
+++ b/libcxx/cmake/caches/Armv7M-picolibc.cmake
@@ -29,6 +29,8 @@ set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
 set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "")
 set(LIBCXX_ENABLE_WIDE_CHARACTERS OFF CACHE BOOL "")
 set(LIBCXX_INCLUDE_BENCHMARKS OFF CACHE BOOL "")
+# Long tests are prohibitively slow when run via emulation.
+set(LIBCXX_TEST_PARAMS "long_tests=False" CACHE STRING "")
 set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
 set(LIBUNWIND_ENABLE_SHARED OFF CACHE BOOL "")
 set(LIBUNWIND_ENABLE_STATIC ON CACHE BOOL "")
@@ -36,3 +38,4 @@ set(LIBUNWIND_ENABLE_THREADS OFF CACHE BOOL "")
 set(LIBUNWIND_IS_BAREMETAL ON CACHE BOOL "")
 set(LIBUNWIND_REMEMBER_HEAP_ALLOC ON CACHE BOOL "")
 set(LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "")
+find_program(QEMU_SYSTEM_ARM qemu-system-arm)

diff  --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst
index 7bb5512beb1f1fc..e8b4a95dbcffae1 100644
--- a/libcxx/docs/index.rst
+++ b/libcxx/docs/index.rst
@@ -133,7 +133,7 @@ Linux i386, x86_64, arm, arm64  Only 
glibc-2.24 and later and no
 Android 5.0+  i386, x86_64, arm, arm64
 Windows   i386, x86_64  Both MSVC and MinGW style 
environments, ABI in MSVC environments is :doc:`unstable 
`
 AIX 7.2TL5+   powerpc, powerpc64
-Embedded (picolibc)   arm   Support for building with 
picolibc is currently work-in-progress
+Embedded (picolibc)   arm
 = = 
 
 Generally speaking, libc++ should work on any platform that provides a fairly 
complete

diff  --git a/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in 
b/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
index c0d23f136475eba..a39d43aec96aa4a 100644
--- a/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
+++ b/libcxx/test/configs/armv7m-picolibc-libc++.cfg.in
@@ -26,10 +26,16 @@ config.substitutions.append(('%{link_flags}',
 ' -Wl,--defsym=__ram_size=0x100'
 ' -Wl,--defsym=__stack_size=0x1000'
 ))
+
+config.executor = (
+'@LIBCXX_SOURCE_DIR@/utils/qemu_baremetal.py'
+' --qemu @QEMU_SYSTEM_ARM@'
+' --machine mps2-an385'
+' --cpu cortex-m3')
 config.substitutions.append(('%{exec}',
-'true' # TODO use qemu-system-arm
+'%{executor}'
+' --execdir %T'
 ))
-config.available_features.add('libcxx-fake-executor')
 
 import os, site
 site.addsitedi

[llvm-branch-commits] [llvm] b92bf0d - [RISCV] Disable clang-format around the RISCVISD opcode enum. NFC

2023-11-30 Thread Craig Topper via llvm-branch-commits

Author: Craig Topper
Date: 2023-11-29T14:01:30-08:00
New Revision: b92bf0dad6ec9760e008a0fa22d7dbd0b045c776

URL: 
https://github.com/llvm/llvm-project/commit/b92bf0dad6ec9760e008a0fa22d7dbd0b045c776
DIFF: 
https://github.com/llvm/llvm-project/commit/b92bf0dad6ec9760e008a0fa22d7dbd0b045c776.diff

LOG: [RISCV] Disable clang-format around the RISCVISD opcode enum. NFC

Added: 


Modified: 
llvm/lib/Target/RISCV/RISCVISelLowering.h

Removed: 




diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h 
b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 45200b54595a0b9..486efeb8339ab0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -25,7 +25,9 @@ namespace llvm {
 class InstructionCost;
 class RISCVSubtarget;
 struct RISCVRegisterInfo;
+
 namespace RISCVISD {
+// clang-format off
 enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
   RET_GLUE,
@@ -421,6 +423,7 @@ enum NodeType : unsigned {
   TH_SWD,
   TH_SDD,
 };
+// clang-format on
 } // namespace RISCVISD
 
 class RISCVTargetLowering : public TargetLowering {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-11-30 Thread Graham Hunter via llvm-branch-commits

huntergr-arm wrote:

Stacked PR on top of https://github.com/llvm/llvm-project/pull/73936

https://github.com/llvm/llvm-project/pull/73941
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-11-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Graham Hunter (huntergr-arm)


Changes

If we have vectorized variants of a function which take linear parameters, we 
should be able to vectorize assuming the strides match.

---

Patch is 26.80 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/73941.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+24) 
- (modified) 
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll (+228-19) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;
diff --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f3811..c6faa812187c07f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 
%n) #0 {
 ; CHECK-LABEL: define void @test_linear
 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label 
[[VECTOR_PH:%.*]]
+; CHECK:   vector.ph:
+; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:[[TMP4:%.*]] = call  
@llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement  
poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector  
[[DOTSPLATINSERT]],  poison,  
zeroinitializer
+; CHECK-NEXT:br label [[VECTOR_BODY:%.*]]
+; CHECK:   vector.body:
+; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_IND:%.*]] = phi  [ [[TMP4]], 
[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]],  [[VEC_IND]]
+; CHECK-NEXT:[[TMP8:%.*]] = extractelement  [[TMP7]], 
i32 0
+; CHECK-NEXT:[[TMP9:%.*]] = call  @foo_linear_nomask(ptr 
[[TMP8]])
+; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDEX]]
+; CHECK-NEXT:store  [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2
+; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT:[[VEC_IND_NEXT]] = add  [[VEC_IND]], 
[[DOTSPLAT]]
+; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label 
[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   middle.block:
+; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label 
[[SCALAR_PH]]
+; CHECK:   scalar.ph:
+; CHECK-NEXT:[[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] 
], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:br label [[F

[llvm-branch-commits] [llvm] [LV] Add support for linear arguments for vector function variants (PR #73941)

2023-11-30 Thread Graham Hunter via llvm-branch-commits

https://github.com/huntergr-arm created 
https://github.com/llvm/llvm-project/pull/73941

If we have vectorized variants of a function which take linear parameters, we 
should be able to vectorize assuming the strides match.

>From 0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea Mon Sep 17 00:00:00 2001
From: Graham Hunter 
Date: Wed, 11 Oct 2023 17:06:09 +0100
Subject: [PATCH] [LV] Add support for linear arguments for vector function
 variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.
---
 .../Transforms/Vectorize/LoopVectorize.cpp|  24 ++
 .../AArch64/vector-call-linear-args.ll| 247 --
 2 files changed, 252 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;
diff --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f3811..c6faa812187c07f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 
%n) #0 {
 ; CHECK-LABEL: define void @test_linear
 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label 
[[VECTOR_PH:%.*]]
+; CHECK:   vector.ph:
+; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:[[TMP4:%.*]] = call  
@llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement  
poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector  
[[DOTSPLATINSERT]],  poison,  
zeroinitializer
+; CHECK-NEXT:br label [[VECTOR_BODY:%.*]]
+; CHECK:   vector.body:
+; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_IND:%.*]] = phi  [ [[TMP4]], 
[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]],  [[VEC_IND]]
+; CHECK-NEXT:[[TMP8:%.*]] = extractelement  [[TMP7]], 
i32 0
+; CHECK-NEXT:[[TMP9:%.*]] = call  @foo_linear_nomask(ptr 
[[TMP8]])
+; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDEX]]
+; CHECK-NEXT:store  [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2
+; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT:[[VEC_IND_NEXT]] = add  [[VEC_IND]], 
[[DOTSPLAT]]
+; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label 
[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   middle.block:
+; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_

[llvm-branch-commits] [llvm] 0cc0f46 - [LV] Add support for linear arguments for vector function variants

2023-11-30 Thread Graham Hunter via llvm-branch-commits

Author: Graham Hunter
Date: 2023-11-30T13:39:06Z
New Revision: 0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea

URL: 
https://github.com/llvm/llvm-project/commit/0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea
DIFF: 
https://github.com/llvm/llvm-project/commit/0cc0f46e6626d73b3a7cc107ddb128ec060ea0ea.diff

LOG: [LV] Add support for linear arguments for vector function variants

If we have vectorized variants of a function which take linear
parameters, we should be able to vectorize assuming the strides
match.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09a6e01226ab68c..4b6eac56597c232 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7035,6 +7035,30 @@ void 
LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
   ParamsOk = false;
 break;
   }
+  case VFParamKind::OMP_Linear: {
+Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
+// Find the stride for the scalar parameter in this loop and see if
+// it matches the stride for the variant.
+// TODO: do we need to figure out the cost of an extract to get the
+// first lane? Or do we hope that it will be folded away?
+ScalarEvolution *SE = PSE.getSE();
+const auto *SAR =
+dyn_cast(SE->getSCEV(ScalarParam));
+
+if (!SAR || SAR->getLoop() != TheLoop) {
+  ParamsOk = false;
+  break;
+}
+
+const SCEVConstant *Step =
+dyn_cast(SAR->getStepRecurrence(*SE));
+
+if (!Step ||
+Step->getAPInt().getSExtValue() != Param.LinearStepOrPos)
+  ParamsOk = false;
+
+break;
+  }
   case VFParamKind::GlobalPredicate:
 UsesMask = true;
 break;

diff  --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f3811..c6faa812187c07f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -9,16 +9,50 @@ define void @test_linear(ptr noalias %a, ptr readnone %b, i64 
%n) #0 {
 ; CHECK-LABEL: define void @test_linear
 ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label 
[[VECTOR_PH:%.*]]
+; CHECK:   vector.ph:
+; CHECK-NEXT:[[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:[[TMP4:%.*]] = call  
@llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:[[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement  
poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector  
[[DOTSPLATINSERT]],  poison,  
zeroinitializer
+; CHECK-NEXT:br label [[VECTOR_BODY:%.*]]
+; CHECK:   vector.body:
+; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_IND:%.*]] = phi  [ [[TMP4]], 
[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[TMP7:%.*]] = getelementptr i64, ptr [[B]],  [[VEC_IND]]
+; CHECK-NEXT:[[TMP8:%.*]] = extractelement  [[TMP7]], 
i32 0
+; CHECK-NEXT:[[TMP9:%.*]] = call  @foo_linear_nomask(ptr 
[[TMP8]])
+; CHECK-NEXT:[[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDEX]]
+; CHECK-NEXT:store  [[TMP9]], ptr [[TMP10]], align 8
+; CHECK-NEXT:[[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:[[TMP12:%.*]] = mul i64 [[TMP11]], 2
+; CHECK-NEXT:[[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT:[[VEC_IND_NEXT]] = add  [[VEC_IND]], 
[[DOTSPLAT]]
+; CHECK-NEXT:[[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label 
[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   middle.block:
+; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:

[llvm-branch-commits] [llvm] 557b422 - [LV] Linear argument tests for vectorization of function calls

2023-11-30 Thread Graham Hunter via llvm-branch-commits

Author: Graham Hunter
Date: 2023-11-30T13:18:06Z
New Revision: 557b422bbcb5c2f2051c806a99c8d2e249717525

URL: 
https://github.com/llvm/llvm-project/commit/557b422bbcb5c2f2051c806a99c8d2e249717525
DIFF: 
https://github.com/llvm/llvm-project/commit/557b422bbcb5c2f2051c806a99c8d2e249717525.diff

LOG: [LV] Linear argument tests for vectorization of function calls

Added: 
llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll

Modified: 


Removed: 




diff  --git 
a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll 
b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
new file mode 100644
index 000..ef6b8e1d83f3811
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 2
+; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 
-S | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; A call whose argument can remain a scalar because it's sequential and only 
the
+; starting value is required.
+define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label 
[[FOR_BODY]]
+; CHECK:   for.cond.cleanup:
+; CHECK-NEXT:ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+  %call = call i64 @foo(ptr %gepb) #1
+  %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %gepa
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 
{
+; CHECK-LABEL: define void @test_linear_with_mask
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) 
#[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:[[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:[[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:[[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:[[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label 
[[FOR_BODY]]
+; CHECK:   for.cond.cleanup:
+; CHECK-NEXT:ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+  %call = call i64 @foo(ptr %gepb) #2
+  %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %gepa
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr 
readonly %c, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_with_vector
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly 
[[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 
[[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:[[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 
[[INDVARS_IV]]
+; CHECK-NEXT:[[DATA:%.*]] = load i32, ptr [[GEPC]], align 8
+; CHECK-NEXT:[[GEPB:%.*]] = geteleme