[clang] [CIR] Upstream SizeOf for VariableArrayType (PR #169993)

Amr Hesham via cfe-commits Tue, 02 Dec 2025 09:42:14 -0800

https://github.com/AmrDeveloper updated 
https://github.com/llvm/llvm-project/pull/169993


>From e279be02566f68b680437b7390542f16bf0b06e3 Mon Sep 17 00:00:00 2001
From: Amr Hesham <[email protected]>
Date: Sat, 29 Nov 2025 13:38:41 +0100
Subject: [PATCH 1/2] [CIR] Upstream SizeOf for VariableArrayType

---
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp |  30 ++--
 clang/test/CIR/CodeGen/size-of-vla.cpp     | 156 +++++++++++++++++++++
 2 files changed, 173 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/size-of-vla.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index a8c2061ddbd6c..c8b70c7ba46ae 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -2344,25 +2344,29 @@ mlir::Value 
ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
         } else {
           // C99 6.5.3.4p2: If the argument is an expression of type
           // VLA, it is evaluated.
-          cgf.getCIRGenModule().errorNYI(
-              e->getSourceRange(),
-              "sizeof operator for VariableArrayType & evaluateExtent "
-              "ignoredExpr",
-              e->getStmtClassName());
-          return {};
+          cgf.emitIgnoredExpr(e->getArgumentExpr());
         }
 
         // For _Countof, we just want to return the size of a single dimension.
         if (kind == UETT_CountOf)
           return cgf.getVLAElements1D(vat).numElts;
 
-        cgf.getCIRGenModule().errorNYI(
-            e->getSourceRange(),
-            "sizeof operator for VariableArrayType & evaluateExtent",
-            e->getStmtClassName());
-        return builder.getConstant(
-            loc, cir::IntAttr::get(cgf.cgm.uInt64Ty,
-                                   -llvm::APSInt(llvm::APInt(64, 1), true)));
+        // For sizeof and __datasizeof, we need to scale the number of elements
+        // by the size of the array element type.
+        auto vlaSize = cgf.getVLASize(vat);
+        mlir::Value numElts = vlaSize.numElts;
+
+        // Scale the number of non-VLA elements by the non-VLA element size.
+        CharUnits eltSize = cgf.getContext().getTypeSizeInChars(vlaSize.type);
+        if (!eltSize.isOne()) {
+          mlir::Location loc = cgf.getLoc(e->getSourceRange());
+          mlir::Value eltSizeValue =
+              builder.getConstAPInt(numElts.getLoc(), numElts.getType(),
+                                    cgf.cgm.getSize(eltSize).getValue());
+          return builder.createMul(loc, eltSizeValue, numElts);
+        }
+
+        return numElts;
       }
     }
   } else if (e->getKind() == UETT_OpenMPRequiredSimdAlign) {
diff --git a/clang/test/CIR/CodeGen/size-of-vla.cpp 
b/clang/test/CIR/CodeGen/size-of-vla.cpp
new file mode 100644
index 0000000000000..f87ede006643b
--- /dev/null
+++ b/clang/test/CIR/CodeGen/size-of-vla.cpp
@@ -0,0 +1,156 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value 
-fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value 
-fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value 
-emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void vla_type_with_element_type_of_size_1() {
+  unsigned long n = 10ul;
+  unsigned long size = sizeof(bool[n]);
+}
+
+// CIR: %[[N_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init]
+// CIR: %[[SIZE_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["size", init]
+// CIR: %[[CONST_10:.*]] = cir.const #cir.int<10> : !u64i
+// CIR: cir.store {{.*}} %[[CONST_10]], %[[N_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_N:.*]] = cir.load {{.*}} %[[N_ADDR]] : !cir.ptr<!u64i>, !u64i
+// CIR: cir.store {{.*}} %[[TMP_N]], %[[SIZE_ADDR]] : !u64i, !cir.ptr<!u64i>
+
+// LLVM: %[[N_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: store i64 10, ptr %[[N_ADDR]], align 8
+// LLVM: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// LLVM: store i64 %[[TMP_N]], ptr %[[SIZE_ADDR]], align 8
+
+// OGCG: %[[N_ADDR:.*]] = alloca i64, align 8
+// OGCG: %[[SIZE_ADDR:.*]] = alloca i64, align 8
+// OGCG: store i64 10, ptr %[[N_ADDR]], align 8
+// OGCG: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// OGCG: store i64 %[[TMP_N]], ptr %[[SIZE_ADDR]], align 8
+
+void vla_type_with_element_type_int() {
+  unsigned long n = 10ul;
+  unsigned long size = sizeof(int[n]);
+}
+
+// CIR: %[[N_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init]
+// CIR: %[[SIZE_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["size", init]
+// CIR: %[[CONST_10:.*]] = cir.const #cir.int<10> : !u64i
+// CIR: cir.store {{.*}} %[[CONST_10]], %[[N_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %3 = cir.load {{.*}} %[[N_ADDR]] : !cir.ptr<!u64i>, !u64i
+// CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !u64i
+// CIR: %[[SIZE:.*]] = cir.binop(mul, %[[CONST_4]], %3) : !u64i
+// CIR: cir.store {{.*}} %[[SIZE]], %[[SIZE_ADDR]] : !u64i, !cir.ptr<!u64i>
+
+// LLVM: %[[N_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: store i64 10, ptr %[[N_ADDR]], align 8
+// LLVM: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// LLVM: %[[SIZE:.*]] = mul i64 4, %[[TMP_N]]
+// LLVM: store i64 %[[SIZE]], ptr %[[SIZE_ADDR]], align 8
+
+// OGCG: %[[N_ADDR:.*]] = alloca i64, align 8
+// OGCG: %[[SIZE_ADDR:.*]] = alloca i64, align 8
+// OGCG: store i64 10, ptr %[[N_ADDR]], align 8
+// OGCG: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// OGCG: %[[SIZE:.*]] = mul nuw i64 4, %[[TMP_N]]
+// OGCG: store i64 %[[SIZE]], ptr %[[SIZE_ADDR]], align 8
+
+void vla_expr_element_type_of_size_1() {
+  unsigned long n = 10ul;
+  bool arr[n];
+  unsigned long size = sizeof(arr);
+}
+
+// CIR: %[[N_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init]
+// CIR: %[[SAVED_STACK_ADDR:.*]] = cir.alloca !cir.ptr<!u8i>, 
!cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR: %[[CONST_10:.*]] = cir.const #cir.int<10> : !u64i
+// CIR: cir.store {{.*}} %[[CONST_10]], %[[N_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_N:.*]] = cir.load {{.*}} %[[N_ADDR]] : !cir.ptr<!u64i>, !u64i
+// CIR: %[[STACK_SAVE:.*]] = cir.stacksave : !cir.ptr<!u8i>
+// CIR: cir.store {{.*}} %[[STACK_SAVE]], %[[SAVED_STACK_ADDR]] : 
!cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+// CIR: %[[ARR_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, 
%[[TMP_N]] : !u64i, ["arr"]
+// CIR: %[[SIZE_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["size", init]
+// CIR: cir.store {{.*}} %[[TMP_N]], %[[SIZE_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_SAVED_STACK:.*]] = cir.load {{.*}} %[[SAVED_STACK_ADDR]] : 
!cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+// CIR: cir.stackrestore %[[TMP_SAVED_STACK]] : !cir.ptr<!u8i>
+
+// LLVM: %[[N_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SAVED_STACK_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: store i64 10, ptr %[[N_ADDR]], align 8
+// LLVM: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// LLVM: %[[STACK_SAVE:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM: store ptr %[[STACK_SAVE]], ptr %[[SAVED_STACK_ADDR]], align 8
+// LLVM: %[[ARR_ADDR:.*]] = alloca i8, i64 %[[TMP_N]], align 16
+// LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: store i64 %[[TMP_N]], ptr %[[SIZE_ADDR]], align 8
+// LLVM: %[[TMP_SAVED_STACK:.*]] = load ptr, ptr %[[SAVED_STACK_ADDR]], align 8
+// LLVM: call void @llvm.stackrestore.p0(ptr %[[TMP_SAVED_STACK]])
+
+// Note: VLA_EXPR0 below is emitted to capture debug info.
+
+// OGCG: %[[N_ADDR:.*]] = alloca i64, align 8
+// OGCG: %[[SAVED_STACK_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[VLA_EXPR0:.*]] = alloca i64, align 8
+// OGCG: %[[SIZE_ADDR:.*]] = alloca i64, align 8
+// OGCG: store i64 10, ptr %[[N_ADDR]], align 8
+// OGCG: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// OGCG: %[[STACK_SAVE:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG: store ptr %[[STACK_SAVE]], ptr %[[SAVED_STACK_ADDR]], align 8
+// OGCG: %[[ARR_ADDR:.*]] = alloca i8, i64 %[[TMP_N]], align 16
+// OGCG: store i64 %[[TMP_N]], ptr %[[VLA_EXPR0]], align 8
+// OGCG: store i64 %[[TMP_N]], ptr %[[SIZE_ADDR]], align 8
+// OGCG: %[[TMP_SAVED_STACK:.*]] = load ptr, ptr %[[SAVED_STACK_ADDR]], align 8
+// OGCG: call void @llvm.stackrestore.p0(ptr %[[TMP_SAVED_STACK]])
+
+void vla_expr_element_type_int() {
+  unsigned long n = 10ul;
+  int arr[n];
+  unsigned long size = sizeof(arr);
+}
+
+// CIR: %[[N_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init]
+// CIR: %[[SAVED_STACK_ADDR:.*]] = cir.alloca !cir.ptr<!u8i>, 
!cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR: %[[CONST_10:.*]] = cir.const #cir.int<10> : !u64i
+// CIR: cir.store {{.*}} %[[CONST_10]], %[[N_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_N:.*]] = cir.load {{.*}} %[[N_ADDR]] : !cir.ptr<!u64i>, !u64i
+// CIR: %[[STACK_SAVE:.*]] = cir.stacksave : !cir.ptr<!u8i>
+// CIR: cir.store {{.*}} %[[STACK_SAVE]], %[[SAVED_STACK_ADDR]] : 
!cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+// CIR: %[[ARR_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[TMP_N]] : 
!u64i, ["arr"]
+// CIR: %[[SIZE_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["size", init]
+// CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !u64i
+// CIR: %[[SIZE:.*]] = cir.binop(mul, %[[CONST_4]], %[[TMP_N]]) : !u64i
+// CIR: cir.store {{.*}} %[[SIZE]], %[[SIZE_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_SAVED_STACK:.*]] = cir.load {{.*}} %[[SAVED_STACK_ADDR]] : 
!cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+// CIR: cir.stackrestore %[[TMP_SAVED_STACK]] : !cir.ptr<!u8i>
+
+// LLVM: %[[N_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SAVED_STACK_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: store i64 10, ptr %[[N_ADDR]], align 8
+// LLVM: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// LLVM: %[[STACK_SAVE:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM: store ptr %[[STACK_SAVE]], ptr %[[SAVED_STACK_ADDR]], align 8
+// LLVM: %[[ARR_ADDR:.*]] = alloca i32, i64 %[[TMP_N]], align 16
+// LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SIZE:.*]] = mul i64 4, %[[TMP_N]]
+// LLVM: store i64 %[[SIZE]], ptr %[[SIZE_ADDR]], align 8
+// LLVM: %[[TMP_SAVED_STACK:.*]] = load ptr, ptr %[[SAVED_STACK_ADDR]], align 8
+// LLVM: call void @llvm.stackrestore.p0(ptr %[[TMP_SAVED_STACK]])
+
+// Note: VLA_EXPR0 below is emitted to capture debug info.
+
+// OGCG: %[[N_ADDR:.*]] = alloca i64, align 8
+// OGCG: %[[SAVED_STACK_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[VLA_EXPR0:.*]] = alloca i64, align 8
+// OGCG: %[[SIZE_ADDR:.*]] = alloca i64, align 8
+// OGCG: store i64 10, ptr %[[N_ADDR]], align 8
+// OGCG: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
+// OGCG: %[[STACK_SAVE:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG: store ptr %[[STACK_SAVE]], ptr %[[SAVED_STACK_ADDR]], align 8
+// OGCG: %[[ARR_ADDR:.*]] = alloca i32, i64 %[[TMP_N]], align 16
+// OGCG: store i64 %[[TMP_N]], ptr %[[VLA_EXPR0]], align 8
+// OGCG: %[[SIZE:.*]] = mul nuw i64 4, %[[TMP_N]]
+// OGCG: store i64 %[[SIZE]], ptr %[[SIZE_ADDR]], align 8
+// OGCG: %[[TMP_SAVED_STACK:.*]] = load ptr, ptr %[[SAVED_STACK_ADDR]], align 8
+// OGCG: call void @llvm.stackrestore.p0(ptr %[[TMP_SAVED_STACK]])

>From a58760aa6e068c2b6eb849c2b38688f5d397c007 Mon Sep 17 00:00:00 2001
From: Amr Hesham <[email protected]>
Date: Tue, 2 Dec 2025 18:05:14 +0100
Subject: [PATCH 2/2] Address code review comments

---
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 5 +++--
 clang/test/CIR/CodeGen/size-of-vla.cpp     | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index c8b70c7ba46ae..3e9d3db768bea 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -2353,7 +2353,7 @@ mlir::Value 
ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
 
         // For sizeof and __datasizeof, we need to scale the number of elements
         // by the size of the array element type.
-        auto vlaSize = cgf.getVLASize(vat);
+        CIRGenFunction::VlaSizePair vlaSize = cgf.getVLASize(vat);
         mlir::Value numElts = vlaSize.numElts;
 
         // Scale the number of non-VLA elements by the non-VLA element size.
@@ -2363,7 +2363,8 @@ mlir::Value 
ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
           mlir::Value eltSizeValue =
               builder.getConstAPInt(numElts.getLoc(), numElts.getType(),
                                     cgf.cgm.getSize(eltSize).getValue());
-          return builder.createMul(loc, eltSizeValue, numElts);
+          return builder.createMul(loc, eltSizeValue, numElts,
+                                   cir::OverflowBehavior::NoUnsignedWrap);
         }
 
         return numElts;
diff --git a/clang/test/CIR/CodeGen/size-of-vla.cpp 
b/clang/test/CIR/CodeGen/size-of-vla.cpp
index f87ede006643b..bcaab27781aa3 100644
--- a/clang/test/CIR/CodeGen/size-of-vla.cpp
+++ b/clang/test/CIR/CodeGen/size-of-vla.cpp
@@ -40,14 +40,14 @@ void vla_type_with_element_type_int() {
 // CIR: cir.store {{.*}} %[[CONST_10]], %[[N_ADDR]] : !u64i, !cir.ptr<!u64i>
 // CIR: %3 = cir.load {{.*}} %[[N_ADDR]] : !cir.ptr<!u64i>, !u64i
 // CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !u64i
-// CIR: %[[SIZE:.*]] = cir.binop(mul, %[[CONST_4]], %3) : !u64i
+// CIR: %[[SIZE:.*]] = cir.binop(mul, %[[CONST_4]], %3) nuw : !u64i
 // CIR: cir.store {{.*}} %[[SIZE]], %[[SIZE_ADDR]] : !u64i, !cir.ptr<!u64i>
 
 // LLVM: %[[N_ADDR:.*]] = alloca i64, i64 1, align 8
 // LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
 // LLVM: store i64 10, ptr %[[N_ADDR]], align 8
 // LLVM: %[[TMP_N:.*]] = load i64, ptr %[[N_ADDR]], align 8
-// LLVM: %[[SIZE:.*]] = mul i64 4, %[[TMP_N]]
+// LLVM: %[[SIZE:.*]] = mul nuw i64 4, %[[TMP_N]]
 // LLVM: store i64 %[[SIZE]], ptr %[[SIZE_ADDR]], align 8
 
 // OGCG: %[[N_ADDR:.*]] = alloca i64, align 8
@@ -120,7 +120,7 @@ void vla_expr_element_type_int() {
 // CIR: %[[ARR_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[TMP_N]] : 
!u64i, ["arr"]
 // CIR: %[[SIZE_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["size", init]
 // CIR: %[[CONST_4:.*]] = cir.const #cir.int<4> : !u64i
-// CIR: %[[SIZE:.*]] = cir.binop(mul, %[[CONST_4]], %[[TMP_N]]) : !u64i
+// CIR: %[[SIZE:.*]] = cir.binop(mul, %[[CONST_4]], %[[TMP_N]]) nuw : !u64i
 // CIR: cir.store {{.*}} %[[SIZE]], %[[SIZE_ADDR]] : !u64i, !cir.ptr<!u64i>
 // CIR: %[[TMP_SAVED_STACK:.*]] = cir.load {{.*}} %[[SAVED_STACK_ADDR]] : 
!cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
 // CIR: cir.stackrestore %[[TMP_SAVED_STACK]] : !cir.ptr<!u8i>
@@ -133,7 +133,7 @@ void vla_expr_element_type_int() {
 // LLVM: store ptr %[[STACK_SAVE]], ptr %[[SAVED_STACK_ADDR]], align 8
 // LLVM: %[[ARR_ADDR:.*]] = alloca i32, i64 %[[TMP_N]], align 16
 // LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
-// LLVM: %[[SIZE:.*]] = mul i64 4, %[[TMP_N]]
+// LLVM: %[[SIZE:.*]] = mul nuw i64 4, %[[TMP_N]]
 // LLVM: store i64 %[[SIZE]], ptr %[[SIZE_ADDR]], align 8
 // LLVM: %[[TMP_SAVED_STACK:.*]] = load ptr, ptr %[[SAVED_STACK_ADDR]], align 8
 // LLVM: call void @llvm.stackrestore.p0(ptr %[[TMP_SAVED_STACK]])

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR] Upstream SizeOf for VariableArrayType (PR #169993)

Reply via email to