[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
https://github.com/topperc closed https://github.com/llvm/llvm-project/pull/139190 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
https://github.com/topperc edited https://github.com/llvm/llvm-project/pull/139190 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF,
llvm::FixedVectorType *ToTy,
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
- FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
+ FromTy = llvm::ScalableVectorType::get(
+ FromTy->getElementType(),
+ llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
+ llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
nikic wrote:
@nunoplopes is correct, there is no bitwise poison in LLVM. That particular bit
of phrasing in the bitcast docs probably predates the introduction of poison.
https://github.com/llvm/llvm-project/pull/139190
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF,
llvm::FixedVectorType *ToTy,
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
- FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
+ FromTy = llvm::ScalableVectorType::get(
+ FromTy->getElementType(),
+ llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
+ llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
paulwalker-arm wrote:
You say "a missed optimization", I say "it has implemented the LangRef" :)
https://github.com/llvm/llvm-project/pull/139190
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF,
llvm::FixedVectorType *ToTy,
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
- FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
+ FromTy = llvm::ScalableVectorType::get(
+ FromTy->getElementType(),
+ llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
+ llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
nunoplopes wrote:
That is just a missed optimization. Offering bit-level poison semantics is too
complicated. No one managed to come up with a proposal for that. So, we have to
go with value-wise poison semantics for the foreseeable future.
https://github.com/llvm/llvm-project/pull/139190
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF,
llvm::FixedVectorType *ToTy,
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
- FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
+ FromTy = llvm::ScalableVectorType::get(
+ FromTy->getElementType(),
+ llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
+ llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
paulwalker-arm wrote:
If it's a choice between the LangRef and an old mailing list post then I think
the LangRef should win. At least the bitcast part of the LangRef suggests this
is not true, which is backed up by https://godbolt.org/z/n67WTcf1v that shows
effort is put in to preserving the known bits?
https://github.com/llvm/llvm-project/pull/139190
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
https://github.com/paulwalker-arm approved this pull request. This looks good to me. Thanks for helping to unblock https://github.com/llvm/llvm-project/pull/130973 @topperc. https://github.com/llvm/llvm-project/pull/139190 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF,
llvm::FixedVectorType *ToTy,
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
- FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
+ FromTy = llvm::ScalableVectorType::get(
+ FromTy->getElementType(),
+ llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
+ llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
nunoplopes wrote:
Yes, that's the semantics. A single poison bits taints the whole value.
So when you bitcast a vector to elements with different sizes, you need to
account for the lanes that share the original lane.
In summary, you can't combine poison elements into a lane that is meaningful
for you.
https://github.com/llvm/llvm-project/pull/139190
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [RISCV] Improve casting between i1 scalable vectors and i8 fixed vectors for -mrvv-vector-bits (PR #139190)
https://github.com/topperc updated
https://github.com/llvm/llvm-project/pull/139190
>From 86692b0229da44dce5321b00c8409e50de86efaf Mon Sep 17 00:00:00 2001
From: Craig Topper
Date: Thu, 8 May 2025 15:13:47 -0700
Subject: [PATCH 1/2] [RISCV] Improve casting between i1 scalable vectors and
i8 fixed vectors for -mrvv-vector-bits
For i1 vectors, we used an i8 fixed vector as the storage type.
If the known minimum number of elements of the scalable vector type
is less than 8, we were doing the cast through memory. This used a
load or store from a fixed vector alloca. If X is less than 8, DataLayout
indicates that the load/store reads/writes vscale bytes even if vscale is
known and vscale*X is less than or equal to 8. This means the load or store
is outside the bounds of the fixed size alloca as far as DataLayout is
concerned leading to undefined behavior.
This patch avoids this by widening the i1 scalable vector type with
zero elements until it is divisible by 8. This allows it be bitcasted
to/from an i8 scalable vector. We then insert or extract the i8 fixed
vector into this type.
Hopefully this enables #130973 to be accepted.
---
clang/lib/CodeGen/CGCall.cpp | 26 -
clang/lib/CodeGen/CGExprScalar.cpp| 27 -
.../attr-riscv-rvv-vector-bits-less-8-call.c | 104 +++---
.../attr-riscv-rvv-vector-bits-less-8-cast.c | 56 ++
.../attr-rvv-vector-bits-bitcast-less-8.c | 32 +++---
.../CodeGen/RISCV/attr-rvv-vector-bits-cast.c | 18 +--
.../RISCV/attr-rvv-vector-bits-codegen.c | 37 ---
.../RISCV/attr-rvv-vector-bits-globals.c | 16 +--
8 files changed, 119 insertions(+), 197 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 9dfd25f9a8d43..81dfc3884f1af 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1366,19 +1366,29 @@ static llvm::Value *CreateCoercedLoad(Address Src,
llvm::Type *Ty,
// If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, use a vector insert and bitcast the result.
if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
- ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
FixedSrcTy->getElementType()->isIntegerTy(8)) {
ScalableDstTy = llvm::ScalableVectorType::get(
FixedSrcTy->getElementType(),
-ScalableDstTy->getElementCount().getKnownMinValue() / 8);
+llvm::divideCeil(
+ScalableDstTy->getElementCount().getKnownMinValue(), 8));
}
if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
auto *Load = CGF.Builder.CreateLoad(Src);
auto *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
llvm::Value *Result = CGF.Builder.CreateInsertVector(
ScalableDstTy, PoisonVec, Load, uint64_t(0), "cast.scalable");
-if (ScalableDstTy != Ty)
- Result = CGF.Builder.CreateBitCast(Result, Ty);
+ScalableDstTy = cast(Ty);
+if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
+!ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
+FixedSrcTy->getElementType()->isIntegerTy(8))
+ ScalableDstTy = llvm::ScalableVectorType::get(
+ ScalableDstTy->getElementType(),
+ llvm::alignTo<8>(
+ ScalableDstTy->getElementCount().getKnownMinValue()));
+if (Result->getType() != ScalableDstTy)
+ Result = CGF.Builder.CreateBitCast(Result, ScalableDstTy);
+if (Result->getType() != Ty)
+ Result = CGF.Builder.CreateExtractVector(Ty, Result, uint64_t(0));
return Result;
}
}
@@ -1476,8 +1486,14 @@ CoerceScalableToFixed(CodeGenFunction &CGF,
llvm::FixedVectorType *ToTy,
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
- FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+if (!FromTy->getElementCount().isKnownMultipleOf(8)) {
+ FromTy = llvm::ScalableVectorType::get(
+ FromTy->getElementType(),
+ llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue()));
+ llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy);
+ V = CGF.Builder.CreateInsertVector(FromTy, ZeroVec, V, uint64_t(0));
+}
FromTy = llvm::ScalableVectorType::get(
ToTy->getElementType(),
FromTy->getElementCount().getKnownMinValue() / 8);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp
b/clang/lib/CodeGen/CGExprScalar.cpp
index f639a87e3ad0b..7639b8518db6e 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2492,18 +2492,28 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// If we are casting a fixed i8 vector to a scalable i1 predicate
// vector, use
