[gem5-dev] Change in gem5/gem5[develop]: arch-arm: Implementation ARMv8.1 RDMA

Jordi Vaquero (Gerrit) via gem5-dev Wed, 14 Oct 2020 08:04:05 -0700

Jordi Vaquero has uploaded this change for review. (https://gem5-review.googlesource.com/c/public/gem5/+/36015 )


Change subject: arch-arm: Implementation ARMv8.1 RDMA
......................................................................

arch-arm: Implementation ARMv8.1 RDMA

Adding RDMA implementation for ARMv8.1
    + isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions
    + isa/insts/neon.isa\neon64.isa: Adding function instructions

Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
---
M src/arch/arm/isa/formats/aarch64.isa
M src/arch/arm/isa/formats/fp.isa
M src/arch/arm/isa/formats/neon64.isa
M src/arch/arm/isa/insts/neon.isa
M src/arch/arm/isa/insts/neon64.isa
M src/arch/arm/utility.cc
M src/arch/arm/utility.hh
7 files changed, 258 insertions(+), 17 deletions(-)

diff --git a/src/arch/arm/isa/formats/aarch64.isab/src/arch/arm/isa/formats/aarch64.isa

index f1a0cdb..4815f4e 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -2961,7 +2961,8 @@
             } else if (bits(machInst, 23) == 0) {
                 return decodeNeonScShiftByImm(machInst);
             }
-        } else if (bits(machInst, 21) == 1) {
+        } else if (bits(machInst, 21) == 1 || (!bits(machInst, 21) &&
+                                                bits(machInst, 15))) {
             if (bits(machInst, 10) == 1) {
                 return decodeNeonSc3Same(machInst);
             } else if (bits(machInst, 11) == 0) {

diff --git a/src/arch/arm/isa/formats/fp.isab/src/arch/arm/isa/formats/fp.isa

index f1b387e..ac9ce81 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -652,7 +652,10 @@
             }
           case 0xb:
             if (o1) {
-                if (u || q) {
+                if (u) {
+                    return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
+                            q, size, machInst, vd, vn, vm);
+                } else if (q) {
                     return new Unknown(machInst);
                 } else {
                     return decodeNeonUThreeUSReg<NVpaddD>(
@@ -1504,6 +1507,30 @@
                     return new Unknown(machInst);
                 }
             }
+          case 0xe:
+            if (u) {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlahsQ<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlahsQ<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            } else {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlahsD<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlahsD<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            }
         }
         return new Unknown(machInst);
     }

diff --git a/src/arch/arm/isa/formats/neon64.isab/src/arch/arm/isa/formats/neon64.isa

index 6c2b2e0..7275ed7 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -516,6 +516,24 @@
         IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);

         switch (opcode) {
+          case 0x10:
+            if (size == 0x3 || size == 0x0)
+                return new Unknown64(machInst);
+            if (q)
+                return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
+                    size, machInst, vd, vn, vm);
+          case 0x11:
+            if (size == 0x3 || size == 0x0)
+                return new Unknown64(machInst);
+            if (q)
+                return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
+                    size, machInst, vd, vn, vm);
           case 0x18:
           case 0x19:
           case 0x1a:
@@ -1531,10 +1549,20 @@

return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX,SqdmulhElemQX>(

                     q, size, machInst, vd, vn, vm, index);
           case 0xd:
-            if (u || (size == 0x0 || size == 0x3))
+            if (size == 0x0 || size == 0x3)
                 return new Unknown64(machInst);
+            else if (u)
+                return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
+                                                   SqrdmlahElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
             else

- return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,SqrdmulhElemQX>(

+                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
+                                                   SqrdmulhElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0xf:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);

+ return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX,SqrdmlshElemQX>(

                     q, size, machInst, vd, vn, vm, index);
           default:
             return new Unknown64(machInst);
@@ -2033,19 +2061,33 @@
                 return decodeNeonSThreeUReg<SqrshlScX>(
                     size, machInst, vd, vn, vm);
           case 0x10:
-            if (size != 0x3)
-                return new Unknown64(machInst);
-            if (u)
-                return new SubDX<uint64_t>(machInst, vd, vn, vm);
-            else
-                return new AddDX<uint64_t>(machInst, vd, vn, vm);
+            if (!bits(machInst, 10)) {
+                if (s || size == 0x3)
+                    return new Unknown64(machInst);
+                return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
+                    size, machInst, vd, vn, vm);
+            } else {
+                if (size != 0x3)
+                    return new Unknown64(machInst);
+                if (u)
+                    return new SubDX<uint64_t>(machInst, vd, vn, vm);
+                else
+                    return new AddDX<uint64_t>(machInst, vd, vn, vm);
+            }
           case 0x11:
-            if (size != 0x3)
-                return new Unknown64(machInst);
-            if (u)
-                return new CmeqDX<uint64_t>(machInst, vd, vn, vm);
-            else
-                return new CmtstDX<uint64_t>(machInst, vd, vn, vm);
+            if (!bits(machInst, 10)) {
+                if (s || size == 0x3)
+                    return new Unknown64(machInst);
+                return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
+                    size, machInst, vd, vn, vm);
+            } else {
+                if (size != 0x3)
+                    return new Unknown64(machInst);
+                if (u)
+                    return new CmeqDX<uint64_t>(machInst, vd, vn, vm);
+                else
+                    return new CmtstDX<uint64_t>(machInst, vd, vn, vm);
+            }
           case 0x16:
             if (size == 0x3 || size == 0x0)
                 return new Unknown64(machInst);
@@ -2486,9 +2528,17 @@
           case 0xd:
             if (size == 0x0 || size == 0x3)
                 return new Unknown64(machInst);
+            else if (u)
+                return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
+                    size, machInst, vd, vn, vm, index);
             else
                 return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
                     size, machInst, vd, vn, vm, index);
+          case 0xf:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
+                                     size, machInst, vd, vn, vm, index);
           default:
             return new Unknown64(machInst);
         }

diff --git a/src/arch/arm/isa/insts/neon.isab/src/arch/arm/isa/insts/neon.isa

index 6290203..9d6a969 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -2783,6 +2783,62 @@

threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp",smallSignedTypes, 2, vqdmulhCode)threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp",smallSignedTypes, 4, vqdmulhCode)


+    vqrdmlahCode = '''
+          if (!HaveQRDMLAHExt(xc->tcBase()))
+            return std::make_shared<UndefinedInstruction>(machInst, true);
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+          auxElem += 2 * (srcElem1 * srcElem2);
+          auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+          destElem = auxElem >> (sizeof(Element) * 8);
+
+          Element maxNeg = std::numeric_limits<Element>::min();
+          Element halfNeg = maxNeg / 2;
+          if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+              if (destElem < 0) {
+                destElem = mask(sizeof(Element) * 8 - 1);
+              } else {
+                destElem = std::numeric_limits<Element>::min();
+              }
+              fpscr.qc = 1;
+          }
+          FpscrQc = fpscr;
+    '''
+    threeEqualRegInst("vqrdmlah", "VqrdmlahD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True)
+    threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True)
+
+    vqrdmlshCode = '''
+          if (!HaveQRDMLAHExt(xc->tcBase()))
+            return std::make_shared<UndefinedInstruction>(machInst, true);
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+          auxElem -= 2 * (srcElem1 * srcElem2);
+          auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+          destElem = auxElem >> (sizeof(Element) * 8);
+
+          Element maxNeg = std::numeric_limits<Element>::min();
+          Element halfNeg = maxNeg / 2;
+          if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+              if (destElem < 0) {
+                destElem = mask(sizeof(Element) * 8 - 1);
+              } else {
+                destElem = std::numeric_limits<Element>::min();
+              }
+              fpscr.qc = 1;
+          }
+          FpscrQc = fpscr;
+    '''
+    threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True)
+    threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True)
+
     vqrdmulhCode = '''
         FPSCR fpscr = (FPSCR) FpscrQc;
         destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
@@ -3033,6 +3089,10 @@
             "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
     twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
             "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
+    twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True)
+    twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True)

     vshrCode = '''
         if (imm >= sizeof(srcElem1) * 8) {

diff --git a/src/arch/arm/isa/insts/neon64.isab/src/arch/arm/isa/insts/neon64.isa

index f049c3e..2bb3c04 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -2336,7 +2336,94 @@
                      sqnegCode)
     twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
                      sqnegCode, scalar=True)
-    # SQRDMULH (by element)
+    # SQRDMLAH (by element)
+    sqrdmlahCode = '''
+          if (!HaveQRDMLAHExt(xc->tcBase()))
+            return std::make_shared<UndefinedInstruction>(machInst, true);
+
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+          auxElem += 2 * (srcElem1 * srcElem2);
+          auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+          destElem = auxElem >> (sizeof(Element) * 8);
+
+          Element maxNeg = std::numeric_limits<Element>::min();
+          Element halfNeg = maxNeg / 2;
+          if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+              if (destElem < 0) {
+                destElem = mask(sizeof(Element) * 8 - 1);
+              } else {
+                destElem = std::numeric_limits<Element>::min();
+              }
+              fpscr.qc = 1;
+          }
+          FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",

+ ("int16_t", "int32_t"), 2, sqrdmlahCode,byElem=True,

+                       readDest=True)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",

+ ("int16_t", "int32_t"), 4, sqrdmlahCode,byElem=True,

+                       readDest=True)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",

+ ("int16_t", "int32_t"), 4, sqrdmlahCode,byElem=True,

+                       readDest=True, scalar=True)
+    # SQRDMLAH (vector)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlahCode,
+                       readDest=True)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode,
+                       readDest=True)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",

+ ("int16_t", "int32_t"), 4, sqrdmlahCode,scalar=True,

+                       readDest=True)
+    # SQRDMLSH (by element)
+    sqrdmlshCode = '''
+          if (!HaveQRDMLAHExt(xc->tcBase()))
+            return std::make_shared<UndefinedInstruction>(machInst, true);
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+          auxElem -= 2 * (srcElem1 * srcElem2);
+          auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+          destElem = auxElem >> (sizeof(Element) * 8);
+
+          Element maxNeg = std::numeric_limits<Element>::min();
+          Element halfNeg = maxNeg / 2;
+          if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+              (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+              if (destElem < 0) {
+                destElem = mask(sizeof(Element) * 8 - 1);
+              } else {
+                destElem = std::numeric_limits<Element>::min();
+              }
+              fpscr.qc = 1;
+          }
+          FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",

+ ("int16_t", "int32_t"), 2, sqrdmlshCode,byElem=True,

+                       readDest=True)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",

+ ("int16_t", "int32_t"), 4, sqrdmlshCode,byElem=True,

+                       readDest=True)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",

+ ("int16_t", "int32_t"), 4, sqrdmlshCode,byElem=True,

+                       readDest=True, scalar=True)
+    # SQRDMLSH (vector)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlshCode,
+                       readDest=True)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode,
+                       readDest=True)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",

+ ("int16_t", "int32_t"), 4, sqrdmlshCode,scalar=True,

+                       readDest=True)
+    # SQRDMULby element)
     sqrdmulhCode = '''
             FPSCR fpscr = (FPSCR) FpscrQc;
             destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index 4d866d0..5526055 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -312,6 +312,21 @@
 }

 bool
+HaveQRDMLAHExt(ThreadContext * tc)
+{
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    bool aarch32 = (cpsr.width == 1);
+    if (aarch32) {
+        uint32_t  isar5 = tc->readMiscReg(MISCREG_ID_ISAR5);
+        return bits(isar5, 27, 24) == 0x1;
+
+    } else {
+        AA64ISAR0 id_aa64isar0 = tc->readMiscReg(MISCREG_ID_AA64ISAR0_EL1);
+        return id_aa64isar0.rdm;
+    }
+}
+
+bool
 HavePACExt(ThreadContext *tc)
 {
     AA64ISAR1 id_aa64isar1 = tc->readMiscReg(MISCREG_ID_AA64ISAR1_EL1);
diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh
index 636625d..5084e52 100644
--- a/src/arch/arm/utility.hh
+++ b/src/arch/arm/utility.hh
@@ -151,6 +151,7 @@
     return opModeToEL((OperatingMode) (uint8_t)cpsr.mode);
 }

+bool HaveQRDMLAHExt(ThreadContext * tc);
 bool HavePACExt(ThreadContext *tc);
 bool HaveVirtHostExt(ThreadContext *tc);
 bool HaveSecureEL2Ext(ThreadContext *tc);

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/36015

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Gerrit-Change-Number: 36015
Gerrit-PatchSet: 1
Gerrit-Owner: Jordi Vaquero <[email protected]>
Gerrit-MessageType: newchange

_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: arch-arm: Implementation ARMv8.1 RDMA

Reply via email to