Jordi Vaquero has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/36015 )
Change subject: arch-arm: Implementation ARMv8.1 RDMA
......................................................................
arch-arm: Implementation ARMv8.1 RDMA
Adding RDMA implementation for ARMv8.1
+ isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions
+ isa/insts/neon.isa\neon64.isa: Adding function instructions
Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36015
Reviewed-by: Giacomo Travaglini <giacomo.travagl...@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travagl...@arm.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/arch/arm/ArmISA.py
M src/arch/arm/isa/formats/aarch64.isa
M src/arch/arm/isa/formats/fp.isa
M src/arch/arm/isa/formats/neon64.isa
M src/arch/arm/isa/insts/neon.isa
M src/arch/arm/isa/insts/neon64.isa
6 files changed, 268 insertions(+), 23 deletions(-)
Approvals:
Giacomo Travaglini: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 02f24d3..3e18665 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -80,7 +80,7 @@
id_isar2 = Param.UInt32(0x21232141, "Instruction Set Attribute
Register 2")
id_isar3 = Param.UInt32(0x01112131, "Instruction Set Attribute
Register 3")
id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute
Register 4")
- id_isar5 = Param.UInt32(0x10000000, "Instruction Set Attribute
Register 5")
+ id_isar5 = Param.UInt32(0x11000000, "Instruction Set Attribute
Register 5")
fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register")
@@ -98,8 +98,8 @@
id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000,
"AArch64 Debug Feature Register 1")
- # !TME | !Atomic | !CRC32 | !SHA2 | !SHA1 | !AES
- id_aa64isar0_el1 = Param.UInt64(0x0000000000000000,
+ # !TME | !Atomic | !CRC32 | !SHA2 | RDM | !SHA1 | !AES
+ id_aa64isar0_el1 = Param.UInt64(0x0000000010000000,
"AArch64 Instruction Set Attribute Register 0")
# GPI = 0x0 | GPA = 0x1 | API=0x0 | FCMA | JSCVT | APA=0x1
diff --git a/src/arch/arm/isa/formats/aarch64.isa
b/src/arch/arm/isa/formats/aarch64.isa
index f1a0cdb..9a487ea 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -2975,6 +2975,8 @@
} else {
return new Unknown64(machInst);
}
+ } else if (bits(machInst, 15) && bits(machInst, 10) == 1) {
+ return decodeNeonSc3SameExtra(machInst);
} else if (bits(machInst, 23, 22) == 0 &&
bits(machInst, 15) == 0) {
if (bits(machInst, 10) == 1) {
diff --git a/src/arch/arm/isa/formats/fp.isa
b/src/arch/arm/isa/formats/fp.isa
index f1b387e..5e7880e 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -652,7 +652,10 @@
}
case 0xb:
if (o1) {
- if (u || q) {
+ if (u) {
+ return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
+ q, size, machInst, vd, vn, vm);
+ } else if (q) {
return new Unknown(machInst);
} else {
return decodeNeonUThreeUSReg<NVpaddD>(
@@ -669,7 +672,10 @@
}
case 0xc:
if (o1) {
- if (!u) {
+ if (u) {
+ return decodeNeonSThreeSReg<VqrdmlshD, VqrdmlshQ>(
+ q, size, machInst, vd, vn, vm);
+ } else {
if (bits(size, 1) == 0) {
if (q) {
return new NVfmaQFp<float>(machInst, vd, vn,
vm);
@@ -1504,6 +1510,54 @@
return new Unknown(machInst);
}
}
+ case 0xe:
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VqrdmlahsQ<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmlahsQ<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VqrdmlahsD<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmlahsD<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
+ case 0xf:
+ if (u) {
+ switch (size) {
+ case 1:
+ return new VqrdmlshsQ<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmlshsQ<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ return new VqrdmlshsD<int16_t>(
+ machInst, vd, vn, vm, index);
+ case 2:
+ return new VqrdmlshsD<int32_t>(
+ machInst, vd, vn, vm, index);
+ default:
+ return new Unknown(machInst);
+ }
+ }
}
return new Unknown(machInst);
}
diff --git a/src/arch/arm/isa/formats/neon64.isa
b/src/arch/arm/isa/formats/neon64.isa
index 6c2b2e0..835909a 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -66,6 +66,8 @@
// AdvSIMD scalar three same
inline StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst);
+ // AdvSIMD scalar three same extra
+ inline StaticInstPtr decodeNeonSc3SameExtra(ExtMachInst machInst);
// AdvSIMD scalar three different
inline StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst);
// AdvSIMD scalar two-reg misc
@@ -516,6 +518,20 @@
IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
switch (opcode) {
+ case 0x10:
+ if (q)
+ return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
+ size, machInst, vd, vn, vm);
+ else
+ return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
+ size, machInst, vd, vn, vm);
+ case 0x11:
+ if (q)
+ return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
+ size, machInst, vd, vn, vm);
+ else
+ return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
+ size, machInst, vd, vn, vm);
case 0x18:
case 0x19:
case 0x1a:
@@ -1531,10 +1547,16 @@
return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX,
SqdmulhElemQX>(
q, size, machInst, vd, vn, vm, index);
case 0xd:
- if (u || (size == 0x0 || size == 0x3))
- return new Unknown64(machInst);
+ if (u)
+ return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
+ SqrdmlahElemQX>(
+ q, size, machInst, vd, vn, vm, index);
else
- return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
SqrdmulhElemQX>(
+ return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
+ SqrdmulhElemQX>(
+ q, size, machInst, vd, vn, vm, index);
+ case 0xf:
+ return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX,
SqrdmlshElemQX>(
q, size, machInst, vd, vn, vm, index);
default:
return new Unknown64(machInst);
@@ -2106,6 +2128,28 @@
}
StaticInstPtr
+ decodeNeonSc3SameExtra(ExtMachInst machInst)
+ {
+ uint8_t size = bits(machInst, 23, 22);
+ uint8_t opcode = bits(machInst, 15, 11);
+
+ IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+ IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+ IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+ switch (opcode) {
+ case 0x10:
+ return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
+ size, machInst, vd, vn, vm);
+ case 0x11:
+ return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
+ size, machInst, vd, vn, vm);
+ default:
+ return new Unknown64(machInst);
+ }
+ }
+
+ StaticInstPtr
decodeNeonSc3Diff(ExtMachInst machInst)
{
if (bits(machInst, 29))
@@ -2434,10 +2478,9 @@
}
IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
- if (u && opcode != 9)
- return new Unknown64(machInst);
+ uint8_t u_opcode = opcode | u << 4;
- switch (opcode) {
+ switch (u_opcode) {
case 0x1:
if (size < 2 || sz_L == 0x3)
return new Unknown64(machInst);
@@ -2465,11 +2508,7 @@
case 0x9:
if (size < 2 || sz_L == 0x3)
return new Unknown64(machInst);
- if (u)
- return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
- size & 0x1, machInst, vd, vn, vm_fp, index_fp);
- else
- return decodeNeonUThreeImmScFpReg<FmulElemScX>(
+ return decodeNeonUThreeImmScFpReg<FmulElemScX>(
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
case 0xb:
if (size == 0x0 || size == 0x3)
@@ -2484,10 +2523,20 @@
return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>(
size, machInst, vd, vn, vm, index);
case 0xd:
- if (size == 0x0 || size == 0x3)
+ return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+ size, machInst, vd, vn, vm, index);
+ case 0x19:
+ if (size < 2 || sz_L == 0x3)
return new Unknown64(machInst);
- else
- return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+ return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
+ size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+
+ case 0x1d:
+ return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
+ size, machInst, vd, vn, vm, index);
+
+ case 0x1f:
+ return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
size, machInst, vd, vn, vm, index);
default:
return new Unknown64(machInst);
diff --git a/src/arch/arm/isa/insts/neon.isa
b/src/arch/arm/isa/insts/neon.isa
index 6290203..2d25f36 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -1147,7 +1147,7 @@
def threeEqualRegInst(name, Name, opClass, types, rCount, op,
readDest=False, pairwise=False, byElem=False,
- standardFpcsr=False, complex=False):
+ standardFpcsr=False, complex=False, extra=''):
global header_output, exec_output
eWalkCode = simdEnabledCheckCode + '''
RegVect srcReg1, destReg;
@@ -1203,6 +1203,7 @@
}
''' % { "op" : op, "readDest" : readDestCode }
else:
+ eWalkCode += extra
eWalkCode += '''
for (unsigned i = 0; i < eCount; i++) {
Element srcElem1 = letoh(srcReg1.elements[i]);
@@ -1398,7 +1399,8 @@
threeUnequalRegInst(name, Name, opClass, types, op,
True, False, True, readDest)
- def twoEqualRegInst(name, Name, opClass, types, rCount, op,
readDest=False):
+ def twoEqualRegInst(name, Name, opClass, types, rCount, op,
+ readDest=False, extra=''):
global header_output, exec_output
eWalkCode = simdEnabledCheckCode + '''
RegVect srcReg1, srcReg2, destReg;
@@ -1415,6 +1417,7 @@
readDestCode = ''
if readDest:
readDestCode = 'destElem = letoh(destReg.elements[i]);'
+ eWalkCode += extra
eWalkCode += '''
if (imm >= eCount) {
return std::make_shared<UndefinedInstruction>(machInst, false,
@@ -2783,6 +2786,55 @@
threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp",
smallSignedTypes, 2, vqdmulhCode)
threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp",
smallSignedTypes, 4, vqdmulhCode)
+
+ vqrdmCode = '''
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ int nbits = sizeof(Element)*8;
+
+ auto val_max = std::numeric_limits<Element>::max();
+ auto val_min = std::numeric_limits<Element>::min();
+ BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
+ ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
+ ((BigElement)1 << (nbits - 1));
+ unsat_value >>= nbits;
+
+ if (unsat_value > val_max) {
+ fpscr.qc = 1;
+ destElem = val_max;
+ } else if (unsat_value < val_min) {
+ fpscr.qc = 1;
+ destElem = val_min;
+ } else {
+ destElem = unsat_value;
+ }
+ FpscrQc = fpscr;
+ '''
+ code_add = "+"
+ vqrdmlahCode = vqrdmCode % {'code': code_add}
+ rdm_check = '''
+ int sz = bits(machInst, 21, 20);
+ RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5);
+ if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0)
+ return std::make_shared<UndefinedInstruction>(machInst, true);
+ typedef __int128_t BigElement;
+ '''
+ threeEqualRegInst("vqrdmlah", "VqrdmlahD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
+ extra=rdm_check)
+ threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
+ extra=rdm_check)
+
+ code_sub = "-"
+ vqrdmlshCode = vqrdmCode % {'code': code_sub}
+ threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
+ extra=rdm_check)
+ threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
+ extra=rdm_check)
+
+
vqrdmulhCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
@@ -3033,6 +3085,18 @@
"SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
+ twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
+ extra=rdm_check)
+ twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
+ extra=rdm_check)
+ twoEqualRegInst("vqrdmlsh", "VqrdmlshsD",
+ "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
+ extra=rdm_check)
+ twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ",
+ "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
+ extra=rdm_check)
vshrCode = '''
if (imm >= sizeof(srcElem1) * 8) {
diff --git a/src/arch/arm/isa/insts/neon64.isa
b/src/arch/arm/isa/insts/neon64.isa
index f049c3e..36db474 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -52,7 +52,8 @@
def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
readDest=False, pairwise=False, scalar=False,
- byElem=False, decoder='Generic', complex=False):
+ byElem=False, decoder='Generic', complex=False,
+ extra=''):
assert (not pairwise) or ((not byElem) and (not scalar))
global header_output, exec_output, decoders
eWalkCode = simd64EnabledCheckCode + '''
@@ -110,6 +111,7 @@
continue;
}
'''
+ eWalkCode += extra
eWalkCode += '''
for (unsigned i = 0; i < eCount; i++) {
%(scalarCheck)s
@@ -2336,7 +2338,81 @@
sqnegCode)
twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
sqnegCode, scalar=True)
- # SQRDMULH (by element)
+ sqrdmCode = '''
+
+ FPSCR fpscr = (FPSCR) FpscrQc;
+ int nbits = sizeof(Element)*8;
+
+ auto val_max = std::numeric_limits<Element>::max();
+ auto val_min = std::numeric_limits<Element>::min();
+ BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
+ ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
+ ((BigElement)1 << (nbits - 1));
+ unsat_value >>= nbits;
+
+ if (unsat_value > val_max) {
+ fpscr.qc = 1;
+ destElem = val_max;
+ } else if (unsat_value < val_min) {
+ fpscr.qc = 1;
+ destElem = val_min;
+ } else {
+ destElem = unsat_value;
+ }
+ FpscrQc = fpscr;
+ '''
+ code_add = "+"
+ sqrdmlahCode = sqrdmCode % {'code': code_add}
+ rdm_check = '''
+ int sz = bits(machInst, 23, 22);
+ AA64ISAR0 isar0 = xc->tcBase()->readMiscReg(
MISCREG_ID_AA64ISAR0_EL1);
+ if (!isar0.rdm || sz == 3 || sz == 0)
+ return std::make_shared<UndefinedInstruction>(machInst, true);
+ typedef __int128_t BigElement;
+ '''
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlahCode,
byElem=True,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
byElem=True,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
byElem=True,
+ readDest=True, scalar=True, extra=rdm_check)
+ # SQRDMLAH (vector)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlahCode,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlahCode,
scalar=True,
+ readDest=True, extra=rdm_check)
+ # SQRDMLSH (by element)
+ code_sub = "-"
+ sqrdmlshCode = sqrdmCode % {'code': code_sub}
+
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlshCode,
byElem=True,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
byElem=True,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
byElem=True,
+ readDest=True, scalar=True, extra=rdm_check)
+ # SQRDMLSH (vector)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
+ ("int16_t", "int32_t"), 2, sqrdmlshCode,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
+ readDest=True, extra=rdm_check)
+ threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",
+ ("int16_t", "int32_t"), 4, sqrdmlshCode,
scalar=True,
+ readDest=True, extra=rdm_check)
+ # SQRDMULby element)
sqrdmulhCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/36015
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Gerrit-Change-Number: 36015
Gerrit-PatchSet: 11
Gerrit-Owner: Jordi Vaquero <jordi.vaqu...@metempsy.com>
Gerrit-Reviewer: Andreas Sandberg <andreas.sandb...@arm.com>
Gerrit-Reviewer: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Jordi Vaquero <jordi.vaqu...@metempsy.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s