This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 356cb57bb9 fix the  8-bit vector loads/stores problem, which will 
solve the problem raised in the codegen test for cuda (#18398)
356cb57bb9 is described below

commit 356cb57bb9a9f46293c6786052ed10780d305fe0
Author: hantao-zhou <[email protected]>
AuthorDate: Mon Oct 27 07:00:35 2025 +0800

    fix the  8-bit vector loads/stores problem, which will solve the problem 
raised in the codegen test for cuda (#18398)
    
    * fix the  8-bit vector loads/stores so each lane is addressed using 
reinterpret_cast byte indexing, instead of rolled bit packing, which will omit 
certain bits.
    
    * fix clang format
---
 src/target/source/codegen_cuda.cc | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/target/source/codegen_cuda.cc 
b/src/target/source/codegen_cuda.cc
index defc94efa2..9565eba5d4 100644
--- a/src/target/source/codegen_cuda.cc
+++ b/src/target/source/codegen_cuda.cc
@@ -640,12 +640,12 @@ void CodeGenCUDA::PrintVecElemLoad(const std::string& 
vec, DataType t, int i,
   static const char access[] = {'x', 'y', 'z', 'w'};
   ICHECK(i >= 0 && i < (t.bits() == 8 ? 16 : (t.bits() == 16 || t.bits() == 
32) ? 8 : 4));
   if (t.bits() == 8 && (t.is_int() || t.is_uint())) {
-    std::string type_name = t.is_int() ? "char" : "unsigned char";
+    std::string type_name = t.is_int() ? "signed char" : "unsigned char";
     if (t.lanes() == 2 || t.lanes() == 3) {
       os << vec << "." << access[i % t.lanes()];
     } else {
       std::string ac = t.lanes() == 4 ? vec : (vec + "." + access[i / 4]);
-      os << "((" << type_name << ")(" << ac << " >> " << i % 4 * 8 << "))";
+      os << "(reinterpret_cast<const " << type_name << "*>(&(" << ac << "))[" 
<< (i % 4) << "])";
     }
   } else if (t.is_float16()) {
     if (t.lanes() <= 4) {
@@ -697,12 +697,9 @@ void CodeGenCUDA::PrintVecElemStore(const std::string& 
vec, DataType t, int i,
              << "(" << value << ");\n";
     } else {
       std::string ac = t.lanes() == 4 ? vec : (vec + "." + access[i / 4]);
-      stream << ac << "=";
-      // Do not read the first undef lane.
-      if (i != 0) {
-        stream << ac << " & ~(0x000000ff << " << i % 4 * 8 << ") |";
-      }
-      stream << "(" << value << " << " << i % 4 * 8 << ");\n";
+      std::string type_name = t.is_int() ? "signed char" : "unsigned char";
+      stream << "reinterpret_cast<" << type_name << "*>(&(" << ac << "))[" << 
(i % 4) << "] = ("
+             << type_name << ")(" << value << ");\n";
     }
   } else if (t.is_float16()) {
     if (t.lanes() <= 4) {

Reply via email to