Ciro Santilli has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/16788
Change subject: arch-arm: implement floating point aarch32 VCVTA family
......................................................................
arch-arm: implement floating point aarch32 VCVTA family
These instructions round floating point to integer, and were added to
aarch32 as an extension to ARMv7.
Change-Id: I62d1705badc95a4e8954a5ad62b2b6bc9e4ffe00
---
M src/arch/arm/isa/formats/fp.isa
M src/arch/arm/isa/insts/fp.isa
2 files changed, 200 insertions(+), 105 deletions(-)
diff --git a/src/arch/arm/isa/formats/fp.isa
b/src/arch/arm/isa/formats/fp.isa
index 77a33e6..102b5a7 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2011, 2016-2018 ARM Limited
+// Copyright (c) 2010-2011, 2016-2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -2001,6 +2001,26 @@
decodeShortFpTransfer(ExtMachInst machInst);
'''
decoder_output = '''
+ IntRegIndex decodeFpVd(ExtMachInst machInst, uint32_t size, bool isInt)
+ {
+ if (!isInt and size == 3) {
+ return (IntRegIndex)((bits(machInst, 22) << 5) |
+ (bits(machInst, 15, 12) << 1));
+ } else {
+ return (IntRegIndex)(bits(machInst, 22) |
+ (bits(machInst, 15, 12) << 1));
+ }
+ }
+ IntRegIndex decodeFpVm(ExtMachInst machInst, uint32_t size, bool isInt)
+ {
+ if (!isInt and size == 3) {
+ return (IntRegIndex)((bits(machInst, 5) << 5) |
+ (bits(machInst, 3, 0) << 1));
+ } else {
+ return (IntRegIndex)(bits(machInst, 5) |
+ (bits(machInst, 3, 0) << 1));
+ }
+ }
StaticInstPtr
decodeShortFpTransfer(ExtMachInst machInst)
{
@@ -2008,67 +2028,143 @@
const uint32_t c = bits(machInst, 8);
const uint32_t a = bits(machInst, 23, 21);
const uint32_t b = bits(machInst, 6, 5);
+ const uint32_t o1 = bits(machInst, 18);
if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
(machInst.thumb == 0 && machInst.condCode == 0xf)) {
// Determine if this is backported aarch64 FP instruction
const bool b31_b24 = bits(machInst, 31, 24) == 0xFE;
const bool b23 = bits(machInst, 23);
- const bool b21_b18 = bits(machInst, 21, 18) == 0xE;
+ const bool b21_b19 = bits(machInst, 21, 19) == 0x7;
const bool b11_b9 = bits(machInst, 11, 9) == 0x5;
- const bool sz = bits(machInst, 8);
- const bool b7_b6 = bits(machInst, 7, 6) == 0x1;
- const bool b6 = bits(machInst, 6) == 0x0;
+ const uint32_t size = bits(machInst, 9, 8);
+ const bool op3 = bits(machInst, 6);
const bool b4 = bits(machInst, 4) == 0x0;
- if (b31_b24 && b23 && b21_b18 && b11_b9 && b7_b6 && b4) {
- // VINT* Integer Rounding Instructon
- const uint32_t rm = bits(machInst, 17, 16);
-
- if (sz) {
- const IntRegIndex vd =
- (IntRegIndex)((bits(machInst, 22) << 5) |
- (bits(machInst, 15, 12) << 1));
- const IntRegIndex vm =
- (IntRegIndex)((bits(machInst, 5) << 5) |
- (bits(machInst, 3, 0) << 1));
- switch(rm) {
- case 0x0:
- return decodeVfpRegRegOp<VRIntAD>(machInst, vd,
vm,
- true);
- case 0x1:
- return decodeVfpRegRegOp<VRIntND>(machInst, vd,
vm,
- true);
- case 0x2:
- return decodeVfpRegRegOp<VRIntPD>(machInst, vd,
vm,
- true);
- case 0x3:
- return decodeVfpRegRegOp<VRIntMD>(machInst, vd,
vm,
- true);
- default: return new Unknown(machInst);
- }
- } else {
- const IntRegIndex vd =
- (IntRegIndex)(bits(machInst, 22) |
- (bits(machInst, 15, 12) << 1));
- const IntRegIndex vm =
- (IntRegIndex)(bits(machInst, 5) |
- (bits(machInst, 3, 0) << 1));
- switch(rm) {
- case 0x0:
- return decodeVfpRegRegOp<VRIntAS>(machInst, vd,
vm,
- false);
- case 0x1:
- return decodeVfpRegRegOp<VRIntNS>(machInst, vd,
vm,
- false);
- case 0x2:
- return decodeVfpRegRegOp<VRIntPS>(machInst, vd,
vm,
- false);
- case 0x3:
- return decodeVfpRegRegOp<VRIntMS>(machInst, vd,
vm,
- false);
- default: return new Unknown(machInst);
- }
- }
- } else if (b31_b24 && !b23 && b11_b9 && b6 && b4){
+ const uint32_t rm = bits(machInst, 17, 16);
+ IntRegIndex vd = decodeFpVd(machInst, size, false);
+ IntRegIndex vm = decodeFpVm(machInst, size, false);
+ IntRegIndex vdInt = decodeFpVd(machInst, size, true);
+ if (b31_b24 && b23 && b21_b19 && b11_b9 && op3 && b4) {
+ if (o1 == 0) {
+ // VINT* Integer Rounding Instruction
+ if (size == 3) {
+ switch(rm) {
+ case 0x0:
+ return decodeVfpRegRegOp<VRIntAD>(machInst,
vd, vm,
+ true);
+ case 0x1:
+ return decodeVfpRegRegOp<VRIntND>(machInst,
vd, vm,
+ true);
+ case 0x2:
+ return decodeVfpRegRegOp<VRIntPD>(machInst,
vd, vm,
+ true);
+ case 0x3:
+ return decodeVfpRegRegOp<VRIntMD>(machInst,
vd, vm,
+ true);
+ default: return new Unknown(machInst);
+ }
+ } else {
+ switch(rm) {
+ case 0x0:
+ return decodeVfpRegRegOp<VRIntAS>(machInst,
vd, vm,
+ false);
+ case 0x1:
+ return decodeVfpRegRegOp<VRIntNS>(machInst,
vd, vm,
+ false);
+ case 0x2:
+ return decodeVfpRegRegOp<VRIntPS>(machInst,
vd, vm,
+ false);
+ case 0x3:
+ return decodeVfpRegRegOp<VRIntMS>(machInst,
vd, vm,
+ false);
+ default: return new Unknown(machInst);
+ }
+ }
+ } else {
+ const bool op = bits(machInst, 7);
+ switch(rm) {
+ case 0x0:
+ switch(size) {
+ case 0x0:
+ return new Unknown(machInst);
+ case 0x1:
+ return new FailUnimplemented(
+ "VCVTA.f16", machInst);
+ case 0x2:
+ if (op) {
+ return new VcvtaFpSIntS(machInst, vdInt,
vm);
+ } else {
+ return new VcvtaFpUIntS(machInst, vdInt,
vm);
+ }
+ case 0x3:
+ if (op) {
+ return new VcvtaFpSIntD(machInst, vdInt,
vm);
+ } else {
+ return new VcvtaFpUIntD(machInst, vdInt,
vm);
+ }
+ }
+ case 0x1:
+ switch(size) {
+ case 0x0:
+ return new Unknown(machInst);
+ case 0x1:
+ return new FailUnimplemented(
+ "VCVTN.u32.f16", machInst);
+ case 0x2:
+ if (op) {
+ return new VcvtnFpSIntS(machInst, vdInt,
vm);
+ } else {
+ return new VcvtnFpUIntS(machInst, vdInt,
vm);
+ }
+ case 0x3:
+ if (op) {
+ return new VcvtnFpSIntD(machInst, vdInt,
vm);
+ } else {
+ return new VcvtnFpUIntD(machInst, vdInt,
vm);
+ }
+ }
+ case 0x2:
+ switch(size) {
+ case 0x0:
+ return new Unknown(machInst);
+ case 0x1:
+ return new FailUnimplemented(
+ "VCVTP.u32.f16", machInst);
+ case 0x2:
+ if (op) {
+ return new VcvtpFpSIntS(machInst, vdInt,
vm);
+ } else {
+ return new VcvtpFpUIntS(machInst, vdInt,
vm);
+ }
+ case 0x3:
+ if (op) {
+ return new VcvtpFpSIntD(machInst, vdInt,
vm);
+ } else {
+ return new VcvtpFpUIntD(machInst, vdInt,
vm);
+ }
+ }
+ case 0x3:
+ switch(size) {
+ case 0x0:
+ return new Unknown(machInst);
+ case 0x1:
+ return new FailUnimplemented(
+ "VCVTM.u32.f16", machInst);
+ case 0x2:
+ if (op) {
+ return new VcvtmFpSIntS(machInst, vdInt,
vm);
+ } else {
+ return new VcvtmFpUIntS(machInst, vdInt,
vm);
+ }
+ case 0x3:
+ if (op) {
+ return new VcvtmFpSIntD(machInst, vdInt,
vm);
+ } else {
+ return new VcvtmFpUIntD(machInst, vdInt,
vm);
+ }
+ }
+ }
+ }
+ } else if (b31_b24 && !b23 && b11_b9 && !op3 && b4){
// VSEL* floating point conditional select
ConditionCode cond;
@@ -2079,24 +2175,12 @@
case 0x3: cond = COND_GT; break;
}
- if (sz) {
- const IntRegIndex vd =
- (IntRegIndex)((bits(machInst, 22) << 5) |
- (bits(machInst, 15, 12) << 1));
- const IntRegIndex vm =
- (IntRegIndex)((bits(machInst, 5) << 5) |
- (bits(machInst, 3, 0) << 1));
+ if (size == 3) {
const IntRegIndex vn =
(IntRegIndex)((bits(machInst, 7) << 5) |
(bits(machInst, 19, 16) << 1));
return new VselD(machInst, vd, vn, vm, cond);
} else {
- const IntRegIndex vd =
- (IntRegIndex)(bits(machInst, 22) |
- (bits(machInst, 15, 12) << 1));
- const IntRegIndex vm =
- (IntRegIndex)(bits(machInst, 5) |
- (bits(machInst, 3, 0) << 1));
const IntRegIndex vn =
(IntRegIndex)((bits(machInst, 19, 16) << 1) |
bits(machInst, 7));
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index dcf5889..d8323c4 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
-// Copyright (c) 2010-2013,2016 ARM Limited
+// Copyright (c) 2010-2013,2016,2018-2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -993,85 +993,96 @@
decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDRIop);
exec_output += PredOpExecute.subst(vcvtFpSIntDRIop);
- vcvtFpUIntSCode = vfpEnabledCheckCode + '''
+ round_mode_suffix_to_mode = {
+ '': 'VfpRoundZero',
+ 'a': 'VfpRoundAway',
+ 'm': 'VfpRoundDown',
+ 'n': 'VfpRoundNearest',
+ 'p': 'VfpRoundUpward',
+ }
+
+ def buildVcvt(code, className, roundModeSuffix):
+ global header_output, decoder_output, exec_output, \
+ vfpEnabledCheckCode, round_mode_suffix_to_mode
+ full_code = vfpEnabledCheckCode + code.format(
+ round_mode=round_mode_suffix_to_mode[roundModeSuffix],
+ )
+ iop = InstObjParams(
+ "vcvt{}".format(roundModeSuffix),
+ className.format(roundModeSuffix),
+ "FpRegRegOp",
+ { "code": full_code,
+ "predicate_test": predicateTest,
+ "op_class": "SimdFloatCvtOp" },
+ []
+ )
+ header_output += FpRegRegOpDeclare.subst(iop);
+ decoder_output += FpRegRegOpConstructor.subst(iop);
+ exec_output += PredOpExecute.subst(iop);
+
+ code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0);
+ FpDest_uw = vfpFpToFixed<float>(
+ FpOp1, false, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (FpDest_uw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
'''
- vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp",
- { "code": vcvtFpUIntSCode,
- "predicate_test": predicateTest,
- "op_class": "SimdFloatCvtOp" }, [])
- header_output += FpRegRegOpDeclare.subst(vcvtFpUIntSIop);
- decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntSIop);
- exec_output += PredOpExecute.subst(vcvtFpUIntSIop);
+ for round_mode_suffix in round_mode_suffix_to_mode:
+ buildVcvt(code, "Vcvt{}FpUIntS", round_mode_suffix)
- vcvtFpUIntDCode = vfpEnabledCheckCode + '''
+ code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0);
+ uint64_t result = vfpFpToFixed<double>(
+ cOp1, false, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
FpscrExc = fpscr;
'''
- vcvtFpUIntDIop = InstObjParams("vcvt", "VcvtFpUIntD", "FpRegRegOp",
- { "code": vcvtFpUIntDCode,
- "predicate_test": predicateTest,
- "op_class": "SimdFloatCvtOp" }, [])
- header_output += FpRegRegOpDeclare.subst(vcvtFpUIntDIop);
- decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntDIop);
- exec_output += PredOpExecute.subst(vcvtFpUIntDIop);
+ for round_mode_suffix in round_mode_suffix_to_mode:
+ buildVcvt(code, "Vcvt{}FpUIntD", round_mode_suffix)
- vcvtFpSIntSCode = vfpEnabledCheckCode + '''
+ code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0);
+ FpDest_sw = vfpFpToFixed<float>(
+ FpOp1, true, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (FpDest_sw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
'''
- vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp",
- { "code": vcvtFpSIntSCode,
- "predicate_test": predicateTest,
- "op_class": "SimdFloatCvtOp" }, [])
- header_output += FpRegRegOpDeclare.subst(vcvtFpSIntSIop);
- decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntSIop);
- exec_output += PredOpExecute.subst(vcvtFpSIntSIop);
+ for round_mode_suffix in round_mode_suffix_to_mode:
+ buildVcvt(code, "Vcvt{}FpSIntS", round_mode_suffix)
- vcvtFpSIntDCode = vfpEnabledCheckCode + '''
+ code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
- int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0);
+ int64_t result = vfpFpToFixed<double>(
+ cOp1, true, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
FpscrExc = fpscr;
'''
- vcvtFpSIntDIop = InstObjParams("vcvt", "VcvtFpSIntD", "FpRegRegOp",
- { "code": vcvtFpSIntDCode,
- "predicate_test": predicateTest,
- "op_class": "SimdFloatCvtOp" }, [])
- header_output += FpRegRegOpDeclare.subst(vcvtFpSIntDIop);
- decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDIop);
- exec_output += PredOpExecute.subst(vcvtFpSIntDIop);
+ for round_mode_suffix in round_mode_suffix_to_mode:
+ buildVcvt(code, "Vcvt{}FpSIntD", round_mode_suffix)
vcvtFpSFpDCode = vfpEnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/16788
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I62d1705badc95a4e8954a5ad62b2b6bc9e4ffe00
Gerrit-Change-Number: 16788
Gerrit-PatchSet: 1
Gerrit-Owner: Ciro Santilli <ciro.santi...@arm.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
gem5-dev@gem5.org
http://m5sim.org/mailman/listinfo/gem5-dev