Jordi Vaquero has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/36015 )
Change subject: arch-arm: Implementation ARMv8.1 RDMA
......................................................................
arch-arm: Implementation ARMv8.1 RDMA
Adding RDMA implementation for ARMv8.1
+ isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions
+ isa/insts/neon.isa\neon64.isa: Adding function instructions
Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
---
M src/arch/arm/isa/formats/aarch64.isa
M src/arch/arm/isa/formats/fp.isa
M src/arch/arm/isa/formats/neon64.isa
M src/arch/arm/isa/insts/neon.isa
M src/arch/arm/isa/insts/neon64.isa
M src/arch/arm/utility.cc
M src/arch/arm/utility.hh
7 files changed, 258 insertions(+), 17 deletions(-)
diff --git a/src/arch/arm/isa/formats/aarch64.isa
b/src/arch/arm/isa/formats/aarch64.isa
index f1a0cdb..4815f4e 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -2961,7 +2961,8 @@
} else if (bits(machInst, 23) == 0) {
return decodeNeonScShiftByImm(machInst);
}
- } else if (bits(machInst, 21) == 1) {
+ } else if (bits(machInst, 21) == 1 || (!bits(machInst, 21) &&
+ bits(machInst, 15))) {
if (bits(machInst, 10) == 1) {
return decodeNeonSc3Same(machInst);
} else if (bits(machInst, 11) == 0) {
diff --git a/src/arch/arm/isa/formats/fp.isa
b/src/arch/arm/isa/formats/fp.isa
index f1b387e..ac9ce81 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -652,7 +652,10 @@
}
case 0xb:
if (o1) {
- if (u || q) {
+ if (u) {
+ return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
+ q, size, machInst, vd, vn, vm);
+ } else if (q) {
return new Unknown(machInst);
} else {
return decodeNeonUThreeUSReg<NVpaddD>(
@@ -1504,6 +1507,30 @@
return new Unknown(machInst);
}
}
+ case 0xe:
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VqrdmlahsQ<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmlahsQ<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VqrdmlahsD<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmlahsD<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
}
return new Unknown(machInst);
}
diff --git a/src/arch/arm/isa/formats/neon64.isa
b/src/arch/arm/isa/formats/neon64.isa
index 6c2b2e0..7275ed7 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -516,6 +516,24 @@
IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
switch (opcode) {
+ case 0x10:
+ if (size == 0x3 || size == 0x0)
+ return new Unknown64(machInst);
+ if (q)
+ return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
+ size, machInst, vd, vn, vm);
+ else
+ return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
+ size, machInst, vd, vn, vm);
+ case 0x11:
+ if (size == 0x3 || size == 0x0)
+ return new Unknown64(machInst);
+ if (q)
+ return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
+ size, machInst, vd, vn, vm);
+ else
+ return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
+ size, machInst, vd, vn, vm);
case 0x18:
case 0x19:
case 0x1a:
@@ -1531,10 +1549,20 @@
return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX,
SqdmulhElemQX>(
q, size, machInst, vd, vn, vm, index);
case 0xd:
- if (u || (size == 0x0 || size == 0x3))
+ if (size == 0x0 || size == 0x3)
return new Unknown64(machInst);
+ else if (u)
+ return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
+ SqrdmlahElemQX>(
+ q, size, machInst, vd, vn, vm, index);
else
- return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
SqrdmulhElemQX>(
+ return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
+ SqrdmulhElemQX>(
+ q, size, machInst, vd, vn, vm, index);
+ case 0xf:
+ if (size == 0x0 || size == 0x3)
+ return new Unknown64(machInst);
+ return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX,
SqrdmlshElemQX>(
q, size, machInst, vd, vn, vm, index);
default:
return new Unknown64(machInst);
@@ -2033,19 +2061,33 @@
return decodeNeonSThreeUReg<SqrshlScX>(
size, machInst, vd, vn, vm);
case 0x10:
- if (size != 0x3)
- return new Unknown64(machInst);
- if (u)
- return new SubDX<uint64_t>(machInst, vd, vn, vm);
- else
- return new AddDX<uint64_t>(machInst, vd, vn, vm);
+ if (!bits(machInst, 10)) {
+ if (s || size == 0x3)
+ return new Unknown64(machInst);
+ return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
+ size, machInst, vd, vn, vm);
+ } else {
+ if (size != 0x3)
+ return new Unknown64(machInst);
+ if (u)
+ return new SubDX<uint64_t>(machInst, vd, vn, vm);
+ else
+ return new AddDX<uint64_t>(machInst, vd, vn, vm);
+ }
case 0x11:
- if (size != 0x3)
- return new Unknown64(machInst);
- if (u)
- return new CmeqDX<uint64_t>(machInst, vd, vn, vm);
- else
- return new CmtstDX<uint64_t>(machInst, vd, vn, vm);
+ if (!bits(machInst, 10)) {
+ if (s || size == 0x3)
+ return new Unknown64(machInst);
+ return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
+ size, machInst, vd, vn, vm);
+ } else {
+ if (size != 0x3)
+ return new Unknown64(machInst);
+ if (u)
+ return new CmeqDX<uint64_t>(machInst, vd, vn, vm);
+ else
+ return new CmtstDX<uint64_t>(machInst, vd, vn, vm);
+ }
case 0x16:
if (size == 0x3 || size == 0x0)
return new Unknown64(machInst);
@@ -2486,9 +2528,17 @@
case 0xd:
if (size == 0x0 || size == 0x3)
return new Unknown64(machInst);
+ else if (u)
+ return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
+ size, machInst, vd, vn, vm, index);
else
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
size, machInst, vd, vn, vm, index);
+ case 0xf:
+ if (size == 0x0 || size == 0x3)
+ return new Unknown64(machInst);
+ return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
+ size, machInst, vd, vn, vm, index);
default:
return new Unknown64(machInst);
}
diff --git a/src/arch/arm/isa/insts/neon.isa
b/src/arch/arm/isa/insts/neon.isa
index 6290203..9d6a969 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -2783,6 +2783,62 @@
threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp",
smallSignedTypes, 2, vqdmulhCode)
threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp",
smallSignedTypes, 4, vqdmulhCode)
+ vqrdmlahCode = '''
+ if (!HaveQRDMLAHExt(xc->tcBase()))
+ return std::make_shared<UndefinedInstruction>(machInst, true);
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+ auxElem += 2 * (srcElem1 * srcElem2);
+ auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+ destElem = auxElem >> (sizeof(Element) * 8);
+
+ Element maxNeg = std::numeric_limits<Element>::min();
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ if (destElem < 0) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ } else {
+ destElem = std::numeric_limits<Element>::min();
+ }
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInst("vqrdmlah", "VqrdmlahD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True)
+ threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True)
+
+ vqrdmlshCode = '''
+ if (!HaveQRDMLAHExt(xc->tcBase()))
+ return std::make_shared<UndefinedInstruction>(machInst, true);
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+ auxElem -= 2 * (srcElem1 * srcElem2);
+ auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+ destElem = auxElem >> (sizeof(Element) * 8);
+
+ Element maxNeg = std::numeric_limits<Element>::min();
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ if (destElem < 0) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ } else {
+ destElem = std::numeric_limits<Element>::min();
+ }
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True)
+ threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True)
+
vqrdmulhCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
@@ -3033,6 +3089,10 @@
"SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
+ twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True)
+ twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True)
vshrCode = '''
if (imm >= sizeof(srcElem1) * 8) {
diff --git a/src/arch/arm/isa/insts/neon64.isa
b/src/arch/arm/isa/insts/neon64.isa
index f049c3e..2bb3c04 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -2336,7 +2336,94 @@
sqnegCode)
twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
sqnegCode, scalar=True)
- # SQRDMULH (by element)
+ # SQRDMLAH (by element)
+ sqrdmlahCode = '''
+ if (!HaveQRDMLAHExt(xc->tcBase()))
+ return std::make_shared<UndefinedInstruction>(machInst, true);
+
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+ auxElem += 2 * (srcElem1 * srcElem2);
+ auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+ destElem = auxElem >> (sizeof(Element) * 8);
+
+ Element maxNeg = std::numeric_limits<Element>::min();
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ if (destElem < 0) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ } else {
+ destElem = std::numeric_limits<Element>::min();
+ }
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlahCode,
byElem=True,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
byElem=True,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
byElem=True,
+ readDest=True, scalar=True)
+ # SQRDMLAH (vector)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlahCode,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
scalar=True,
+ readDest=True)
+ # SQRDMLSH (by element)
+ sqrdmlshCode = '''
+ if (!HaveQRDMLAHExt(xc->tcBase()))
+ return std::make_shared<UndefinedInstruction>(machInst, true);
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ uint64_t auxElem = (uint64_t)destElem << (sizeof(Element)*8);
+ auxElem -= 2 * (srcElem1 * srcElem2);
+ auxElem += ((int64_t)1 << (sizeof(Element) * 8 - 1));
+ destElem = auxElem >> (sizeof(Element) * 8);
+
+ Element maxNeg = std::numeric_limits<Element>::min();
+ Element halfNeg = maxNeg / 2;
+ if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+ (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+ if (destElem < 0) {
+ destElem = mask(sizeof(Element) * 8 - 1);
+ } else {
+ destElem = std::numeric_limits<Element>::min();
+ }
+ fpscr.qc = 1;
+ }
+ FpscrQc = fpscr;
+ '''
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlshCode,
byElem=True,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
byElem=True,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
byElem=True,
+ readDest=True, scalar=True)
+ # SQRDMLSH (vector)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlshCode,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
+ readDest=True)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
scalar=True,
+ readDest=True)
+ # SQRDMULby element)
sqrdmulhCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index 4d866d0..5526055 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -312,6 +312,21 @@
}
bool
+HaveQRDMLAHExt(ThreadContext * tc)
+{
+ CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+ bool aarch32 = (cpsr.width == 1);
+ if (aarch32) {
+ uint32_t isar5 = tc->readMiscReg(MISCREG_ID_ISAR5);
+ return bits(isar5, 27, 24) == 0x1;
+
+ } else {
+ AA64ISAR0 id_aa64isar0 = tc->readMiscReg(MISCREG_ID_AA64ISAR0_EL1);
+ return id_aa64isar0.rdm;
+ }
+}
+
+bool
HavePACExt(ThreadContext *tc)
{
AA64ISAR1 id_aa64isar1 = tc->readMiscReg(MISCREG_ID_AA64ISAR1_EL1);
diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh
index 636625d..5084e52 100644
--- a/src/arch/arm/utility.hh
+++ b/src/arch/arm/utility.hh
@@ -151,6 +151,7 @@
return opModeToEL((OperatingMode) (uint8_t)cpsr.mode);
}
+bool HaveQRDMLAHExt(ThreadContext * tc);
bool HavePACExt(ThreadContext *tc);
bool HaveVirtHostExt(ThreadContext *tc);
bool HaveSecureEL2Ext(ThreadContext *tc);
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/36015
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Gerrit-Change-Number: 36015
Gerrit-PatchSet: 1
Gerrit-Owner: Jordi Vaquero <jordi.vaqu...@metempsy.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s