Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (198904 => 198905)
--- trunk/Source/_javascript_Core/ChangeLog 2016-03-31 18:50:55 UTC (rev 198904)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-03-31 18:54:36 UTC (rev 198905)
@@ -1,3 +1,47 @@
+2016-03-31 Benjamin Poulain <benja...@webkit.org>
+
+ [JSC][x86] Add the indexed forms of floating point addition and multiplication
+ https://bugs.webkit.org/show_bug.cgi?id=156058
+
+ Reviewed by Geoffrey Garen.
+
+ B3 supports lowering [base, index] addresses into
+ arbitrary instructions but we were not using that feature.
+
+ This patch adds the missing support for the lowering
+ of Add and Mul.
+
+ * assembler/MacroAssemblerX86Common.h:
+ (JSC::MacroAssemblerX86Common::addDouble):
+ (JSC::MacroAssemblerX86Common::addFloat):
+ (JSC::MacroAssemblerX86Common::mulDouble):
+ (JSC::MacroAssemblerX86Common::mulFloat):
+ * assembler/X86Assembler.h:
+ (JSC::X86Assembler::addsd_mr):
+ (JSC::X86Assembler::vaddsd_mr):
+ (JSC::X86Assembler::addss_mr):
+ (JSC::X86Assembler::vaddss_mr):
+ (JSC::X86Assembler::mulsd_mr):
+ (JSC::X86Assembler::vmulsd_mr):
+ (JSC::X86Assembler::mulss_mr):
+ (JSC::X86Assembler::vmulss_mr):
+ (JSC::X86Assembler::X86InstructionFormatter::SingleInstructionBufferWriter::memoryModRM):
+ * b3/B3LowerToAir.cpp:
+ (JSC::B3::Air::LowerToAir::appendBinOp):
+ Unlike the Addr form, we never need to transform a Tmp
+ into an Index for spilling.
+
+ Instead of duplicating all the code in MacroAssembler, I can
+ just have the lowering phase try using addresses for the first
+ argument when possible.
+
+ * b3/air/AirOpcode.opcodes:
+ * b3/air/testair.cpp:
+ (JSC::B3::Air::testX86VMULSDBaseNeedsRex):
+ (JSC::B3::Air::testX86VMULSDIndexNeedsRex):
+ (JSC::B3::Air::testX86VMULSDBaseIndexNeedRex):
+ (JSC::B3::Air::run):
+
2016-03-31 Saam barati <sbar...@apple.com>
DFG JIT bug in typeof constant folding where the input to typeof is an object or function
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h (198904 => 198905)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2016-03-31 18:50:55 UTC (rev 198904)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86Common.h 2016-03-31 18:54:36 UTC (rev 198905)
@@ -1134,6 +1134,21 @@
addDouble(op2, op1, dest);
}
+ void addDouble(BaseIndex op1, FPRegisterID op2, FPRegisterID dest)
+ {
+ if (supportsAVX())
+ m_assembler.vaddsd_mr(op1.offset, op1.base, op1.index, op1.scale, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.addsd_mr(op1.offset, op1.base, op1.index, op1.scale, dest);
+ return;
+ }
+ loadDouble(op1, dest);
+ addDouble(op2, dest);
+ }
+ }
+
void addFloat(FPRegisterID src, FPRegisterID dest)
{
addFloat(src, dest, dest);
@@ -1180,6 +1195,21 @@
addFloat(op2, op1, dest);
}
+ void addFloat(BaseIndex op1, FPRegisterID op2, FPRegisterID dest)
+ {
+ if (supportsAVX())
+ m_assembler.vaddss_mr(op1.offset, op1.base, op1.index, op1.scale, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.addss_mr(op1.offset, op1.base, op1.index, op1.scale, dest);
+ return;
+ }
+ loadFloat(op1, dest);
+ addFloat(op2, dest);
+ }
+ }
+
void divDouble(FPRegisterID src, FPRegisterID dest)
{
ASSERT(isSSE2Present());
@@ -1291,6 +1321,21 @@
return mulDouble(op2, op1, dest);
}
+ void mulDouble(BaseIndex op1, FPRegisterID op2, FPRegisterID dest)
+ {
+ if (supportsAVX())
+ m_assembler.vmulsd_mr(op1.offset, op1.base, op1.index, op1.scale, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.mulsd_mr(op1.offset, op1.base, op1.index, op1.scale, dest);
+ return;
+ }
+ loadDouble(op1, dest);
+ mulDouble(op2, dest);
+ }
+ }
+
void mulFloat(FPRegisterID src, FPRegisterID dest)
{
mulFloat(src, dest, dest);
@@ -1336,6 +1381,21 @@
mulFloat(op2, op1, dest);
}
+ void mulFloat(BaseIndex op1, FPRegisterID op2, FPRegisterID dest)
+ {
+ if (supportsAVX())
+ m_assembler.vmulss_mr(op1.offset, op1.base, op1.index, op1.scale, op2, dest);
+ else {
+ ASSERT(isSSE2Present());
+ if (op2 == dest) {
+ m_assembler.mulss_mr(op1.offset, op1.base, op1.index, op1.scale, dest);
+ return;
+ }
+ loadFloat(op1, dest);
+ mulFloat(op2, dest);
+ }
+ }
+
void andDouble(FPRegisterID src, FPRegisterID dst)
{
// ANDPS is defined on 128bits and is shorter than ANDPD.
Modified: trunk/Source/_javascript_Core/assembler/X86Assembler.h (198904 => 198905)
--- trunk/Source/_javascript_Core/assembler/X86Assembler.h 2016-03-31 18:50:55 UTC (rev 198904)
+++ trunk/Source/_javascript_Core/assembler/X86Assembler.h 2016-03-31 18:54:36 UTC (rev 198905)
@@ -2109,11 +2109,22 @@
m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void addsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
+ {
+ m_formatter.prefix(PRE_SSE_F2);
+ m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, dst, base, index, scale, offset);
+ }
+
void vaddsd_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
{
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F2, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
}
+ void vaddsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F2, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)b, offset, base, index, scale);
+ }
+
void addss_rr(XMMRegisterID src, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F3);
@@ -2131,11 +2142,22 @@
m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void addss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
+ {
+ m_formatter.prefix(PRE_SSE_F3);
+ m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, dst, base, index, scale, offset);
+ }
+
void vaddss_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
{
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
}
+ void vaddss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)b, offset, base, index, scale);
+ }
+
#if !CPU(X86_64)
void addsd_mr(const void* address, XMMRegisterID dst)
{
@@ -2337,11 +2359,22 @@
m_formatter.twoByteOp(OP2_MULSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void mulsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
+ {
+ m_formatter.prefix(PRE_SSE_F2);
+ m_formatter.twoByteOp(OP2_MULSD_VsdWsd, dst, base, index, scale, offset);
+ }
+
void vmulsd_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
{
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F2, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
}
+ void vmulsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F2, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)b, offset, base, index, scale);
+ }
+
void mulss_rr(XMMRegisterID src, XMMRegisterID dst)
{
m_formatter.prefix(PRE_SSE_F3);
@@ -2359,11 +2392,22 @@
m_formatter.twoByteOp(OP2_MULSD_VsdWsd, (RegisterID)dst, base, offset);
}
+ void mulss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
+ {
+ m_formatter.prefix(PRE_SSE_F3);
+ m_formatter.twoByteOp(OP2_MULSD_VsdWsd, dst, base, index, scale, offset);
+ }
+
void vmulss_mr(int offset, RegisterID base, XMMRegisterID b, XMMRegisterID dst)
{
m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)b, base, offset);
}
+ void vmulss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID b, XMMRegisterID dst)
+ {
+ m_formatter.vexNdsLigWigTwoByteOp(PRE_SSE_F3, OP2_MULSD_VsdWsd, (RegisterID)dst, (RegisterID)b, offset, base, index, scale);
+ }
+
void pextrw_irr(int whichWord, XMMRegisterID src, RegisterID dst)
{
m_formatter.prefix(PRE_SSE_66);
@@ -3129,6 +3173,21 @@
putByteUnchecked(secondByte);
}
+ ALWAYS_INLINE void threeBytesVexNds(OneByteOpcodeID simdPrefix, VexImpliedBytes impliedBytes, RegisterID r, RegisterID inOpReg, RegisterID x, RegisterID b)
+ {
+ putByteUnchecked(VexPrefix::ThreeBytes);
+
+ uint8_t secondByte = static_cast<uint8_t>(impliedBytes);
+ secondByte |= !regRequiresRex(r) << 7;
+ secondByte |= !regRequiresRex(x) << 6;
+ secondByte |= !regRequiresRex(b) << 5;
+ putByteUnchecked(secondByte);
+
+ uint8_t thirdByte = vexEncodeSimdPrefix(simdPrefix);
+ thirdByte |= (~inOpReg & 0xf) << 3;
+ putByteUnchecked(thirdByte);
+ }
+
ALWAYS_INLINE void threeBytesVexNds(OneByteOpcodeID simdPrefix, VexImpliedBytes impliedBytes, RegisterID r, RegisterID inOpReg, RegisterID b)
{
putByteUnchecked(VexPrefix::ThreeBytes);
@@ -3307,6 +3366,17 @@
writer.memoryModRM(dest, base, offset);
}
+ void vexNdsLigWigTwoByteOp(OneByteOpcodeID simdPrefix, TwoByteOpcodeID opcode, RegisterID dest, RegisterID a, int offset, RegisterID base, RegisterID index, int scale)
+ {
+ SingleInstructionBufferWriter writer(m_buffer);
+ if (regRequiresRex(base, index))
+ writer.threeBytesVexNds(simdPrefix, VexImpliedBytes::TwoBytesOp, dest, a, index, base);
+ else
+ writer.twoBytesVex(simdPrefix, a, dest);
+ writer.putByteUnchecked(opcode);
+ writer.memoryModRM(dest, base, index, scale, offset);
+ }
+
void threeByteOp(TwoByteOpcodeID twoBytePrefix, ThreeByteOpcodeID opcode)
{
SingleInstructionBufferWriter writer(m_buffer);
Modified: trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp (198904 => 198905)
--- trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2016-03-31 18:50:55 UTC (rev 198904)
+++ trunk/Source/_javascript_Core/b3/B3LowerToAir.cpp 2016-03-31 18:54:36 UTC (rev 198905)
@@ -734,6 +734,14 @@
append(opcode, tmp(left), rightAddr.consume(*this), result);
return;
}
+
+ if (commutativity == Commutative) {
+ if (isValidForm(opcode, rightAddr.kind(), Arg::Tmp, Arg::Tmp)) {
+ append(opcode, rightAddr.consume(*this), tmp(left), result);
+ return;
+ }
+ }
+
if (isValidForm(opcode, rightAddr.kind(), Arg::Tmp)) {
append(relaxedMoveForType(m_value->type()), tmp(left), result);
append(opcode, rightAddr.consume(*this), result);
Modified: trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes (198904 => 198905)
--- trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes 2016-03-31 18:50:55 UTC (rev 198904)
+++ trunk/Source/_javascript_Core/b3/air/AirOpcode.opcodes 2016-03-31 18:54:36 UTC (rev 198905)
@@ -147,6 +147,7 @@
Tmp, Tmp, Tmp
x86: Addr, Tmp, Tmp
x86: Tmp, Addr, Tmp
+ x86: Index, Tmp, Tmp
x86: AddDouble U:F:64, UD:F:64
Tmp, Tmp
@@ -156,6 +157,7 @@
Tmp, Tmp, Tmp
x86: Addr, Tmp, Tmp
x86: Tmp, Addr, Tmp
+ x86: Index, Tmp, Tmp
x86: AddFloat U:F:32, UD:F:32
Tmp, Tmp
@@ -243,6 +245,7 @@
Tmp, Tmp, Tmp
x86: Addr, Tmp, Tmp
x86: Tmp, Addr, Tmp
+ x86: Index, Tmp, Tmp
x86: MulDouble U:F:64, UD:F:64
Tmp, Tmp
@@ -252,6 +255,7 @@
Tmp, Tmp, Tmp
x86: Addr, Tmp, Tmp
x86: Tmp, Addr, Tmp
+ x86: Index, Tmp, Tmp
x86: MulFloat U:F:32, UD:F:32
Tmp, Tmp
Modified: trunk/Source/_javascript_Core/b3/air/testair.cpp (198904 => 198905)
--- trunk/Source/_javascript_Core/b3/air/testair.cpp 2016-03-31 18:50:55 UTC (rev 198904)
+++ trunk/Source/_javascript_Core/b3/air/testair.cpp 2016-03-31 18:54:36 UTC (rev 198905)
@@ -1776,6 +1776,55 @@
CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, pureNaN()) == 2.4 * 4.2);
}
+void testX86VMULSDBaseNeedsRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13));
+ root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r13), Tmp(GPRInfo::argumentGPR1)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ uint64_t index = 8;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, index, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDIndexNeedsRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13));
+ root->append(MulDouble, nullptr, Arg::index(Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ uint64_t index = - 8;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 1, index, pureNaN()) == 2.4 * 4.2);
+}
+
+void testX86VMULSDBaseIndexNeedRex()
+{
+ B3::Procedure proc;
+ Code& code = proc.code();
+
+ BasicBlock* root = code.addBlock();
+ root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r12));
+ root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13));
+ root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r12), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0));
+ root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR));
+ root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR));
+
+ double secondArg = 4.2;
+ uint64_t index = 16;
+ CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, index, pureNaN()) == 2.4 * 4.2);
+}
+
#endif
#define RUN(test) do { \
@@ -1850,6 +1899,10 @@
RUN(testX86VMULSDDestRexAddr());
RUN(testX86VMULSDRegOpDestRexAddr());
RUN(testX86VMULSDAddrOpDestRexAddr());
+
+ RUN(testX86VMULSDBaseNeedsRex());
+ RUN(testX86VMULSDIndexNeedsRex());
+ RUN(testX86VMULSDBaseIndexNeedRex());
#endif
if (tasks.isEmpty())