This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 356cb57bb9 fix the 8-bit vector loads/stores problem, which will
solve the problem raised in the codegen test for cuda (#18398)
356cb57bb9 is described below
commit 356cb57bb9a9f46293c6786052ed10780d305fe0
Author: hantao-zhou <[email protected]>
AuthorDate: Mon Oct 27 07:00:35 2025 +0800
fix the 8-bit vector loads/stores problem, which will solve the problem
raised in the codegen test for cuda (#18398)
* fix the 8-bit vector loads/stores so each lane is addressed using
reinterpret_cast byte indexing, instead of rolled bit packing, which will omit
certain bits.
* fix clang format
---
src/target/source/codegen_cuda.cc | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/src/target/source/codegen_cuda.cc
b/src/target/source/codegen_cuda.cc
index defc94efa2..9565eba5d4 100644
--- a/src/target/source/codegen_cuda.cc
+++ b/src/target/source/codegen_cuda.cc
@@ -640,12 +640,12 @@ void CodeGenCUDA::PrintVecElemLoad(const std::string&
vec, DataType t, int i,
static const char access[] = {'x', 'y', 'z', 'w'};
ICHECK(i >= 0 && i < (t.bits() == 8 ? 16 : (t.bits() == 16 || t.bits() ==
32) ? 8 : 4));
if (t.bits() == 8 && (t.is_int() || t.is_uint())) {
- std::string type_name = t.is_int() ? "char" : "unsigned char";
+ std::string type_name = t.is_int() ? "signed char" : "unsigned char";
if (t.lanes() == 2 || t.lanes() == 3) {
os << vec << "." << access[i % t.lanes()];
} else {
std::string ac = t.lanes() == 4 ? vec : (vec + "." + access[i / 4]);
- os << "((" << type_name << ")(" << ac << " >> " << i % 4 * 8 << "))";
+ os << "(reinterpret_cast<const " << type_name << "*>(&(" << ac << "))["
<< (i % 4) << "])";
}
} else if (t.is_float16()) {
if (t.lanes() <= 4) {
@@ -697,12 +697,9 @@ void CodeGenCUDA::PrintVecElemStore(const std::string&
vec, DataType t, int i,
<< "(" << value << ");\n";
} else {
std::string ac = t.lanes() == 4 ? vec : (vec + "." + access[i / 4]);
- stream << ac << "=";
- // Do not read the first undef lane.
- if (i != 0) {
- stream << ac << " & ~(0x000000ff << " << i % 4 * 8 << ") |";
- }
- stream << "(" << value << " << " << i % 4 * 8 << ");\n";
+ std::string type_name = t.is_int() ? "signed char" : "unsigned char";
+ stream << "reinterpret_cast<" << type_name << "*>(&(" << ac << "))[" <<
(i % 4) << "] = ("
+ << type_name << ")(" << value << ");\n";
}
} else if (t.is_float16()) {
if (t.lanes() <= 4) {