sdesmalen created this revision. sdesmalen added reviewers: SjoerdMeijer, efriedma. Herald added a subscriber: tschuett. Herald added a project: clang. sdesmalen added a parent revision: D78674: [SveEmitter] Add builtins for contiguous prefetches.
Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D78677 Files: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c clang/utils/TableGen/SveEmitter.cpp
Index: clang/utils/TableGen/SveEmitter.cpp =================================================================== --- clang/utils/TableGen/SveEmitter.cpp +++ clang/utils/TableGen/SveEmitter.cpp @@ -599,6 +599,12 @@ Float = true; ElementBitwidth = 64; break; + case 'Q': + Constant = true; + Pointer = true; + Void = true; + NumVectors = 0; + break; case 'S': Constant = true; Pointer = true; Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c @@ -116,3 +116,151 @@ // CHECK: @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0) return svprfw_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfw_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfw_gather_u32base + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfw_gather_u32base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfw_gather(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfw_gather + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfw_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfw_gather_u64base + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfw_gather_u64base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfw_gather_1(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfw_gather_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfw_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfw_gather_s32index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_s32index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_s32index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_s64index(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_s64index + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_s64index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_index_1(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_index_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u32index(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_u32index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_u32index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_index_2(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_index_2 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u64index(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_u64index + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_u64index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_index_3(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfw_gather_index_3 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfw_gather_u32base_index + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2 + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfw_gather_u32base_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfw_gather_index_4(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfw_gather_index_4 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2 + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfw_gather_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfw_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfw_gather_u64base_index + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2 + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfw_gather_u64base_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfw_gather_index_5(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfw_gather_index_5 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2 + // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfw_gather_index(pg, bases, index, SV_PLDL1KEEP); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c @@ -116,3 +116,151 @@ // CHECK: @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0) return svprfh_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfh_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfh_gather_u32base + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfh_gather_u32base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfh_gather(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfh_gather + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfh_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfh_gather_u64base + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfh_gather_u64base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfh_gather_1(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfh_gather_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfh_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfh_gather_s32index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_s32index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_s32index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_s64index(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_s64index + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_s64index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_index_1(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_index_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u32index(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_u32index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_u32index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_index_2(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_index_2 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u64index(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_u64index + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_u64index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_index_3(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfh_gather_index_3 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfh_gather_u32base_index + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1 + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfh_gather_u32base_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfh_gather_index_4(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfh_gather_index_4 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1 + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfh_gather_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfh_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfh_gather_u64base_index + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1 + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfh_gather_u64base_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfh_gather_index_5(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfh_gather_index_5 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1 + // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfh_gather_index(pg, bases, index, SV_PLDL1KEEP); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c @@ -116,3 +116,151 @@ // CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0) return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfd_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfd_gather_u32base + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfd_gather_u32base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfd_gather(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfd_gather + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfd_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfd_gather_u64base + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfd_gather_u64base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfd_gather_1(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfd_gather_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfd_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfd_gather_s32index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_s32index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_s32index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_index(svbool_t pg, const void *base, svint32_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_s64index(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_s64index + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_s64index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_index_1(svbool_t pg, const void *base, svint64_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_index_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u32index(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_u32index + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_u32index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_index_2(svbool_t pg, const void *base, svuint32_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_index_2 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u64index(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_u64index + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_u64index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_index_3(svbool_t pg, const void *base, svuint64_t indices) +{ + // CHECK-LABEL: test_svprfd_gather_index_3 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0) + // CHECK: ret void + return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfd_gather_u32base_index + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3 + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfd_gather_u32base_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfd_gather_index_4(svbool_t pg, svuint32_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfd_gather_index_4 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3 + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfd_gather_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfd_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfd_gather_u64base_index + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3 + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfd_gather_u64base_index(pg, bases, index, SV_PLDL1KEEP); +} + +void test_svprfd_gather_index_5(svbool_t pg, svuint64_t bases, int64_t index) +{ + // CHECK-LABEL: test_svprfd_gather_index_5 + // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3 + // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0) + // CHECK: ret void + return svprfd_gather_index(pg, bases, index, SV_PLDL1KEEP); +} Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c @@ -102,3 +102,147 @@ // CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]], i32 0) return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP); } + +void test_svprfb_gather_u32base(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfb_gather_u32base + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfb_gather_u32base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfb_gather(svbool_t pg, svuint32_t bases) +{ + // CHECK-LABEL: test_svprfb_gather + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfb_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u64base(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfb_gather_u64base + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfb_gather_u64base(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfb_gather_1(svbool_t pg, svuint64_t bases) +{ + // CHECK-LABEL: test_svprfb_gather_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0) + // CHECK: ret void + return svprfb_gather(pg, bases, SV_PLDL1KEEP); +} + +void test_svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_s32offset + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_s32offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_offset(svbool_t pg, const void *base, svint32_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_offset + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_s64offset + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_s64offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_offset_1(svbool_t pg, const void *base, svint64_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_offset_1 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_u32offset + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_u32offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_offset_2(svbool_t pg, const void *base, svuint32_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_offset_2 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_u64offset + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_u64offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_offset_3(svbool_t pg, const void *base, svuint64_t offsets) +{ + // CHECK-LABEL: test_svprfb_gather_offset_3 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0) + // CHECK: ret void + return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u32base_offset(svbool_t pg, svuint32_t bases, int64_t offset) +{ + // CHECK-LABEL: test_svprfb_gather_u32base_offset + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset, i32 0) + // CHECK: ret void + return svprfb_gather_u32base_offset(pg, bases, offset, SV_PLDL1KEEP); +} + +void test_svprfb_gather_offset_4(svbool_t pg, svuint32_t bases, int64_t offset) +{ + // CHECK-LABEL: test_svprfb_gather_offset_4 + // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset, i32 0) + // CHECK: ret void + return svprfb_gather_offset(pg, bases, offset, SV_PLDL1KEEP); +} + +void test_svprfb_gather_u64base_offset(svbool_t pg, svuint64_t bases, int64_t offset) +{ + // CHECK-LABEL: test_svprfb_gather_u64base_offset + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset, i32 0) + // CHECK: ret void + return svprfb_gather_u64base_offset(pg, bases, offset, SV_PLDL1KEEP); +} + +void test_svprfb_gather_offset_5(svbool_t pg, svuint64_t bases, int64_t offset) +{ + // CHECK-LABEL: test_svprfb_gather_offset_5 + // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg) + // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset, i32 0) + // CHECK: ret void + return svprfb_gather_offset(pg, bases, offset, SV_PLDL1KEEP); +} Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -3930,6 +3930,9 @@ llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, SmallVectorImpl<llvm::Value *> &Ops, unsigned BuiltinID); + llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -7714,6 +7714,39 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + // The gather prefetches are overloaded on the vector input - this can either + // be the vector of base addresses or vector of offsets. + llvm::VectorType *OverloadedTy = dyn_cast<llvm::VectorType>(Ops[1]->getType()); + if (!OverloadedTy) + OverloadedTy = cast<llvm::VectorType>(Ops[2]->getType()); + + // Cast the predicate from svbool_t to the right number of elements. + Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); + + // vector + imm addressing modes + if (Ops[1]->getType()->isVectorTy()) { + if (Ops.size() == 3) { + // Pass 0 for 'vector+imm' when the index is omitted. + Ops.push_back(ConstantInt::get(Int64Ty, 0)); + + // The sv_prfop is the last operand in the builtin and IR intrinsic. + std::swap(Ops[2], Ops[3]); + } + + // Index needs to be passed as scaled offset. + llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); + unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; + Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); + Ops[2] = Builder.CreateMul(Ops[2], Scale); + } + + Function *F = CGM.getIntrinsic(IntID, OverloadedTy); + return Builder.CreateCall(F, Ops); +} + Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, SmallVectorImpl<Value *> &Ops, unsigned BuiltinID) { @@ -7869,6 +7902,8 @@ return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isPrefetch()) return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isGatherPrefetch()) + return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (Builtin->LLVMIntrinsic != 0) { if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) InsertExplicitZeroOperand(Builder, Ty, Ops); Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -95,6 +95,8 @@ // G: pointer to uint32_t // H: pointer to uint64_t +// Q: const pointer to void + // S: const pointer to int8_t // T: const pointer to int16_t // U: const pointer to int32_t @@ -182,6 +184,7 @@ def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. +def IsGatherPrefetch : FlagType<0x10000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType<int val> { @@ -494,6 +497,39 @@ def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +// Prefetch (Vector bases) +def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; +def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">; +def SVPRFW_GATHER_BASES : MInst<"svprfw_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; +def SVPRFD_GATHER_BASES : MInst<"svprfd_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; + +// Prefetch (Scalar base, Vector offsets) +def SVPRFB_GATHER_32B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_sxtw_index">; +def SVPRFH_GATHER_32B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_sxtw_index">; +def SVPRFW_GATHER_32B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_sxtw_index">; +def SVPRFD_GATHER_32B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_sxtw_index">; + +def SVPRFB_GATHER_64B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">; +def SVPRFH_GATHER_64B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">; +def SVPRFW_GATHER_64B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">; +def SVPRFD_GATHER_64B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">; + +def SVPRFB_GATHER_32B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_uxtw_index">; +def SVPRFH_GATHER_32B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_uxtw_index">; +def SVPRFW_GATHER_32B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_uxtw_index">; +def SVPRFD_GATHER_32B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_uxtw_index">; + +def SVPRFB_GATHER_64B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">; +def SVPRFH_GATHER_64B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">; +def SVPRFW_GATHER_64B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">; +def SVPRFD_GATHER_64B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">; + +// Prefetch (Vector bases, scalar offset) +def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; +def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">; +def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; +def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; + //////////////////////////////////////////////////////////////////////////////// // Integer arithmetic Index: clang/include/clang/Basic/TargetBuiltins.h =================================================================== --- clang/include/clang/Basic/TargetBuiltins.h +++ clang/include/clang/Basic/TargetBuiltins.h @@ -241,6 +241,7 @@ bool isAppendSVALL() const { return Flags & IsAppendSVALL; } bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; } bool isPrefetch() const { return Flags & IsPrefetch; } + bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits