tyb0807 updated this revision to Diff 404854.
tyb0807 added a comment.

Add support for `+nomops`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118199/new/

https://reviews.llvm.org/D118199

Files:
  clang/lib/Basic/Targets/AArch64.cpp
  clang/test/CodeGen/aarch64-mops.c
  clang/test/Preprocessor/aarch64-target-features.c

Index: clang/test/Preprocessor/aarch64-target-features.c
===================================================================
--- clang/test/Preprocessor/aarch64-target-features.c
+++ clang/test/Preprocessor/aarch64-target-features.c
@@ -510,9 +510,25 @@
 // CHECK-NO-SVE-VECTOR-BITS-NOT: __ARM_FEATURE_SVE_BITS
 // CHECK-NO-SVE-VECTOR-BITS-NOT: __ARM_FEATURE_SVE_VECTOR_OPERATORS
 
-// ================== Check Largse System Extensions (LSE)
+// ================== Check Large System Extensions (LSE)
 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
 // CHECK-LSE: __ARM_FEATURE_ATOMICS 1
+
+// ================== Check Armv8.8-A/Armv9.3-A memcpy and memset acceleration instructions (MOPS)
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a             -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a+mops        -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a             -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+nomops      -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+nomops+mops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+mops        -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+mops+nomops -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a             -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a+mops        -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a             -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a+nomops      -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a+mops        -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// CHECK-MOPS: __ARM_FEATURE_MOPS 1
+// CHECK-NOMOPS-NOT: __ARM_FEATURE_MOPS 1
Index: clang/test/CodeGen/aarch64-mops.c
===================================================================
--- clang/test/CodeGen/aarch64-mops.c
+++ clang/test/CodeGen/aarch64-mops.c
@@ -1,152 +1,281 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 
-// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s  | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi -target-feature +mops -target-feature +mte -S -emit-llvm -o - %s  | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang_cc1 -triple aarch64-arm-unknown-eabi                                            -S -emit-llvm -o - %s  | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a+mops+memtag -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.7-a             -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a+memtag      -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.8-a             -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a+mops+memtag -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.2-a             -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a+memtag      -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-MOPS %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv9.3-a             -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-NOMOPS %s
 
-#define __ARM_FEATURE_MOPS 1
 #include <arm_acle.h>
 #include <stddef.h>
 
-// CHECK-LABEL: @bzero_0(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0)
-// CHECK-NEXT:    ret i8* [[TMP1]]
+// CHECK-MOPS-LABEL: @bzero_0(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 0)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP1]]
+//
+// CHECK-NOMOPS-LABEL: @bzero_0(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 0)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP1]]
 //
 void *bzero_0(void *dst) {
   return __arm_mops_memset_tag(dst, 0, 0);
 }
 
-// CHECK-LABEL: @bzero_1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1)
-// CHECK-NEXT:    ret i8* [[TMP1]]
+// CHECK-MOPS-LABEL: @bzero_1(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 1)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP1]]
+//
+// CHECK-NOMOPS-LABEL: @bzero_1(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 1)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP1]]
 //
 void *bzero_1(void *dst) {
   return __arm_mops_memset_tag(dst, 0, 1);
 }
 
-// CHECK-LABEL: @bzero_10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10)
-// CHECK-NEXT:    ret i8* [[TMP1]]
+// CHECK-MOPS-LABEL: @bzero_10(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP1]]
+//
+// CHECK-NOMOPS-LABEL: @bzero_10(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 10)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP1]]
 //
 void *bzero_10(void *dst) {
   return __arm_mops_memset_tag(dst, 0, 10);
 }
 
-// CHECK-LABEL: @bzero_10000(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000)
-// CHECK-NEXT:    ret i8* [[TMP1]]
+// CHECK-MOPS-LABEL: @bzero_10000(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 10000)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP1]]
+//
+// CHECK-NOMOPS-LABEL: @bzero_10000(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef 0, i32 noundef 10000)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP1]]
 //
 void *bzero_10000(void *dst) {
   return __arm_mops_memset_tag(dst, 0, 10000);
 }
 
-// CHECK-LABEL: @bzero_n(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]])
-// CHECK-NEXT:    ret i8* [[TMP2]]
+// CHECK-MOPS-LABEL: @bzero_n(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP2:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 0, i64 [[TMP1]])
+// CHECK-MOPS-NEXT:    ret i8* [[TMP2]]
+//
+// CHECK-NOMOPS-LABEL: @bzero_n(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i64)*)(i8* noundef [[TMP0]], i32 noundef 0, i64 noundef [[TMP1]])
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP2]]
 //
 void *bzero_n(void *dst, size_t size) {
   return __arm_mops_memset_tag(dst, 0, size);
 }
 
-// CHECK-LABEL: @memset_0(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0)
-// CHECK-NEXT:    ret i8* [[TMP3]]
+// CHECK-MOPS-LABEL: @memset_0(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-MOPS-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 0)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP3]]
+//
+// CHECK-NOMOPS-LABEL: @memset_0(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 0)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP2]]
 //
 void *memset_0(void *dst, int value) {
   return __arm_mops_memset_tag(dst, value, 0);
 }
 
-// CHECK-LABEL: @memset_1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1)
-// CHECK-NEXT:    ret i8* [[TMP3]]
+// CHECK-MOPS-LABEL: @memset_1(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-MOPS-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 1)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP3]]
+//
+// CHECK-NOMOPS-LABEL: @memset_1(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 1)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP2]]
 //
 void *memset_1(void *dst, int value) {
   return __arm_mops_memset_tag(dst, value, 1);
 }
 
-// CHECK-LABEL: @memset_10(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10)
-// CHECK-NEXT:    ret i8* [[TMP3]]
+// CHECK-MOPS-LABEL: @memset_10(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-MOPS-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP3]]
+//
+// CHECK-NOMOPS-LABEL: @memset_10(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 10)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP2]]
 //
 void *memset_10(void *dst, int value) {
   return __arm_mops_memset_tag(dst, value, 10);
 }
 
-// CHECK-LABEL: @memset_10000(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-// CHECK-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000)
-// CHECK-NEXT:    ret i8* [[TMP3]]
+// CHECK-MOPS-LABEL: @memset_10000(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-MOPS-NEXT:    [[TMP3:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP2]], i64 10000)
+// CHECK-MOPS-NEXT:    ret i8* [[TMP3]]
+//
+// CHECK-NOMOPS-LABEL: @memset_10000(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i32)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 10000)
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP2]]
 //
 void *memset_10000(void *dst, int value) {
   return __arm_mops_memset_tag(dst, value, 10000);
 }
 
-// CHECK-LABEL: @memset_n(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
-// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
-// CHECK-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8
-// CHECK-NEXT:    [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]])
-// CHECK-NEXT:    ret i8* [[TMP4]]
+// CHECK-MOPS-LABEL: @memset_n(
+// CHECK-MOPS:       entry:
+// CHECK-MOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-MOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-MOPS-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-MOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-MOPS-NEXT:    [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-MOPS-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8
+// CHECK-MOPS-NEXT:    [[TMP4:%.*]] = call i8* @llvm.aarch64.mops.memset.tag(i8* [[TMP0]], i8 [[TMP3]], i64 [[TMP2]])
+// CHECK-MOPS-NEXT:    ret i8* [[TMP4]]
+//
+// CHECK-NOMOPS-LABEL: @memset_n(
+// CHECK-NOMOPS:       entry:
+// CHECK-NOMOPS-NEXT:    [[DST_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NOMOPS-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NOMOPS-NEXT:    [[SIZE_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NOMOPS-NEXT:    store i8* [[DST:%.*]], i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    store i32 [[VALUE:%.*]], i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    store i64 [[SIZE:%.*]], i64* [[SIZE_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[DST_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[VALUE_ADDR]], align 4
+// CHECK-NOMOPS-NEXT:    [[TMP2:%.*]] = load i64, i64* [[SIZE_ADDR]], align 8
+// CHECK-NOMOPS-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @__arm_mops_memset_tag to i32 (i8*, i32, i64)*)(i8* noundef [[TMP0]], i32 noundef [[TMP1]], i64 noundef [[TMP2]])
+// CHECK-NOMOPS-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
+// CHECK-NOMOPS-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[CONV]] to i8*
+// CHECK-NOMOPS-NEXT:    ret i8* [[TMP3]]
 //
 void *memset_n(void *dst, int value, size_t size) {
   return __arm_mops_memset_tag(dst, value, size);
Index: clang/lib/Basic/Targets/AArch64.cpp
===================================================================
--- clang/lib/Basic/Targets/AArch64.cpp
+++ clang/lib/Basic/Targets/AArch64.cpp
@@ -435,6 +435,9 @@
   if (HasRandGen)
     Builder.defineMacro("__ARM_FEATURE_RNG", "1");
 
+  if (HasMOPS)
+    Builder.defineMacro("__ARM_FEATURE_MOPS", "1");
+
   switch (ArchKind) {
   default:
     break;
@@ -624,16 +627,20 @@
       ArchKind = llvm::AArch64::ArchKind::ARMV8_6A;
     if (Feature == "+v8.7a")
       ArchKind = llvm::AArch64::ArchKind::ARMV8_7A;
-    if (Feature == "+v8.8a")
+    if (Feature == "+v8.8a") {
       ArchKind = llvm::AArch64::ArchKind::ARMV8_8A;
+      HasMOPS = true;
+    }
     if (Feature == "+v9a")
       ArchKind = llvm::AArch64::ArchKind::ARMV9A;
     if (Feature == "+v9.1a")
       ArchKind = llvm::AArch64::ArchKind::ARMV9_1A;
     if (Feature == "+v9.2a")
       ArchKind = llvm::AArch64::ArchKind::ARMV9_2A;
-    if (Feature == "+v9.3a")
+    if (Feature == "+v9.3a") {
       ArchKind = llvm::AArch64::ArchKind::ARMV9_3A;
+      HasMOPS = true;
+    }
     if (Feature == "+v8r")
       ArchKind = llvm::AArch64::ArchKind::ARMV8R;
     if (Feature == "+fullfp16")
@@ -662,6 +669,10 @@
       HasFlagM = true;
     if (Feature == "+hbc")
       HasHBC = true;
+    if (Feature == "+mops")
+      HasMOPS = true;
+    if (Feature == "-mops")
+      HasMOPS = false;
   }
 
   setDataLayout();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to