Diff
Modified: trunk/JSTests/ChangeLog (223690 => 223691)
--- trunk/JSTests/ChangeLog 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/JSTests/ChangeLog 2017-10-19 16:27:44 UTC (rev 223691)
@@ -1,3 +1,28 @@
+2017-10-19 Robin Morisset <rmoris...@apple.com>
+
+ Turn recursive tail calls into loops
+ https://bugs.webkit.org/show_bug.cgi?id=176601
+
+ Reviewed by Saam Barati.
+
+ Add some simple test that computes factorial in several ways, and other trivial computations.
+ They all tests the case where foo calls bar (in an inlineable way) that then does a tail call.
+ Depending on the nature of both calls, it is possible or not to turn the tail call into a loop.
+ I have no clear way of checking that the call was indeed transformed, but I can check that the code computes the right result
+ (which it doesn't if that tail call is transformed into a loop in the unsound cases).
+
+ * stress/inline-call-to-recursive-tail-call.js: Added.
+ (factorial.aux):
+ (factorial):
+ (factorial2.aux):
+ (factorial2.id):
+ (factorial2):
+ (factorial3.aux):
+ (factorial3):
+ (aux):
+ (factorial4):
+ (test):
+
2017-10-18 Mark Lam <mark....@apple.com>
RegExpObject::defineOwnProperty() does not need to compare values if no descriptor value is specified.
Added: trunk/JSTests/stress/inline-call-to-recursive-tail-call.js (0 => 223691)
--- trunk/JSTests/stress/inline-call-to-recursive-tail-call.js (rev 0)
+++ trunk/JSTests/stress/inline-call-to-recursive-tail-call.js 2017-10-19 16:27:44 UTC (rev 223691)
@@ -0,0 +1,94 @@
+"use strict";
+
+// factorial calls aux that tail-calls back into factorial.
+// It is not OK to turn that tail call into a jump back to the top of the function, because the call to aux is not a tail call.
+function factorial(n) {
+ function aux(n) {
+ if (n == 1)
+ return 1;
+ return factorial(n - 1);
+ }
+
+ return n * aux(n);
+}
+
+// Same, but this time with an irrelevant tail call in factorial.
+// This exercises a different code path because the recursive tail call optimization aborts early if the op_enter block is not split, and it is split by the detection of a tail call.
+function factorial2(n) {
+ function aux2(n) {
+ if (n == 1)
+ return 1;
+ return factorial2(n - 1);
+ }
+ function id(n) {
+ return n;
+ }
+
+ return id(n * aux2(n));
+}
+
+// This time, aux is tail-calling itself: the optimization is valid but must jump to the start of aux3, not of factorial3.
+function factorial3(n) {
+ function aux3(n, acc) {
+ if (n == 1)
+ return acc;
+ return aux3 (n-1, n*acc);
+ }
+
+ return n * aux3(n-1, 1);
+}
+
+// In this case, it is valid to jump all the way to the top of factorial4, because all calls are tail calls.
+function factorial4(n, acc) {
+ function aux4(n, acc) {
+ if (n == 1)
+ return acc;
+ return factorial4(n-1, n*acc);
+ }
+
+ if (acc)
+ return aux4(n, acc);
+ return aux4(n, 1);
+}
+
+// This function is used to test that recursing with a different number of arguments doesn't mess up with the stack.
+// The first two tail calls should be optimized, but not the last one (no place on the stack for the third argument).
+function foo(a, b) {
+ if (a == 0)
+ return 42;
+ if (a == 1)
+ return foo(a - 1);
+ if (a == 2)
+ return foo(b - 1, a);
+ return foo (b - 1, a, 43);
+}
+
+// Same deal as foo, just with an inlining thrown into the mix.
+// Even the first tail call should not be optimized in this case, for subtle reasons.
+function bar(x, y) {
+ function auxBar(a, b) {
+ if (a == 0)
+ return 42;
+ if (a == 1)
+ return foo(a - 1);
+ if (a == 2)
+ return foo(b - 1, a);
+ return foo (b - 1, a, 43);
+ }
+
+ return auxBar(x, y);
+}
+
+function test(result, expected, name) {
+ if (result != expected)
+ throw "Wrong result for " + name + ": " + result + " instead of " + expected;
+}
+
+for (var i = 0; i < 10000; ++i) {
+ test(factorial(20), 2432902008176640000, "factorial");
+ test(factorial2(20), 2432902008176640000, "factorial2");
+ test(factorial3(20), 2432902008176640000, "factorial3");
+ test(factorial4(20), 2432902008176640000, "factorial4");
+ test(foo(10, 10), 42, "foo");
+ test(bar(10, 10), 42, "bar");
+}
Modified: trunk/Source/_javascript_Core/ChangeLog (223690 => 223691)
--- trunk/Source/_javascript_Core/ChangeLog 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/ChangeLog 2017-10-19 16:27:44 UTC (rev 223691)
@@ -1,3 +1,41 @@
+2017-10-19 Robin Morisset <rmoris...@apple.com>
+
+ Turn recursive tail calls into loops
+ https://bugs.webkit.org/show_bug.cgi?id=176601
+
+ Reviewed by Saam Barati.
+
+ We want to turn recursive tail calls into loops early in the pipeline, so that the loops can then be optimized.
+ One difficulty is that we need to split the entry block of the function we are jumping to in order to have somewhere to jump to.
+ Worse: it is not necessarily the first block of the codeBlock, because of inlining! So we must do the splitting in the DFGByteCodeParser, at the same time as inlining.
+ We do this part through modifying the computation of the jump targets.
+ Importantly, we only do this splitting for functions that have tail calls.
+ It is the only case where the optimisation is sound, and doing the splitting unconditionnaly destroys performance on Octane/raytrace.
+
+ We must then do the actual transformation also in DFGByteCodeParser, to avoid code motion moving code out of the body of what will become a loop.
+ The transformation is entirely contained in handleRecursiveTailCall, which is hooked to the inlining machinery.
+
+ * bytecode/CodeBlock.h:
+ (JSC::CodeBlock::hasTailCalls const):
+ * bytecode/PreciseJumpTargets.cpp:
+ (JSC::getJumpTargetsForBytecodeOffset):
+ (JSC::computePreciseJumpTargetsInternal):
+ * bytecode/UnlinkedCodeBlock.cpp:
+ (JSC::UnlinkedCodeBlock::UnlinkedCodeBlock):
+ * bytecode/UnlinkedCodeBlock.h:
+ (JSC::UnlinkedCodeBlock::hasTailCalls const):
+ (JSC::UnlinkedCodeBlock::setHasTailCalls):
+ * bytecompiler/BytecodeGenerator.cpp:
+ (JSC::BytecodeGenerator::emitEnter):
+ (JSC::BytecodeGenerator::emitCallInTailPosition):
+ * dfg/DFGByteCodeParser.cpp:
+ (JSC::DFG::ByteCodeParser::allocateTargetableBlock):
+ (JSC::DFG::ByteCodeParser::makeBlockTargetable):
+ (JSC::DFG::ByteCodeParser::handleCall):
+ (JSC::DFG::ByteCodeParser::handleRecursiveTailCall):
+ (JSC::DFG::ByteCodeParser::parseBlock):
+ (JSC::DFG::ByteCodeParser::parse):
+
2017-10-18 Mark Lam <mark....@apple.com>
RegExpObject::defineOwnProperty() does not need to compare values if no descriptor value is specified.
Modified: trunk/Source/_javascript_Core/bytecode/CodeBlock.h (223690 => 223691)
--- trunk/Source/_javascript_Core/bytecode/CodeBlock.h 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/bytecode/CodeBlock.h 2017-10-19 16:27:44 UTC (rev 223691)
@@ -742,7 +742,7 @@
// Call this to cause an optimization trigger to fire soon, but
// not necessarily the next one. This makes sense if optimization
- // succeeds. Successfuly optimization means that all calls are
+ // succeeds. Successful optimization means that all calls are
// relinked to the optimized code, so this only affects call
// frames that are still executing this CodeBlock. The value here
// is tuned to strike a balance between the cost of OSR entry
@@ -925,6 +925,8 @@
std::optional<CodeOrigin> findPC(void* pc);
#endif
+ bool hasTailCalls() const { return m_unlinkedCode->hasTailCalls(); }
+
protected:
void finalizeLLIntInlineCaches();
void finalizeBaselineJITInlineCaches();
Modified: trunk/Source/_javascript_Core/bytecode/PreciseJumpTargets.cpp (223690 => 223691)
--- trunk/Source/_javascript_Core/bytecode/PreciseJumpTargets.cpp 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/bytecode/PreciseJumpTargets.cpp 2017-10-19 16:27:44 UTC (rev 223691)
@@ -42,6 +42,11 @@
// op_loop_hint does not have jump target stored in bytecode instructions.
if (opcodeID == op_loop_hint)
out.append(bytecodeOffset);
+ else if (opcodeID == op_enter && codeBlock->hasTailCalls()) {
+ // We need to insert a jump after op_enter, so recursive tail calls have somewhere to jump to.
+ // But we only want to pay that price for functions that have at least one tail call.
+ out.append(bytecodeOffset + opcodeLengths[op_enter]);
+ }
}
enum class ComputePreciseJumpTargetsMode {
@@ -53,9 +58,8 @@
void computePreciseJumpTargetsInternal(Block* codeBlock, Instruction* instructionsBegin, unsigned instructionCount, Vector<unsigned, vectorSize>& out)
{
ASSERT(out.isEmpty());
-
- // We will derive a superset of the jump targets that the code block thinks it has.
- // So, if the code block claims there are none, then we are done.
+
+ // The code block has a superset of the jump targets. So if it claims to have none, we are done.
if (Mode == ComputePreciseJumpTargetsMode::FollowCodeBlockClaim && !codeBlock->numberOfJumpTargets())
return;
Modified: trunk/Source/_javascript_Core/bytecode/UnlinkedCodeBlock.cpp (223690 => 223691)
--- trunk/Source/_javascript_Core/bytecode/UnlinkedCodeBlock.cpp 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/bytecode/UnlinkedCodeBlock.cpp 2017-10-19 16:27:44 UTC (rev 223691)
@@ -70,6 +70,7 @@
, m_constructorKind(static_cast<unsigned>(info.constructorKind()))
, m_derivedContextType(static_cast<unsigned>(info.derivedContextType()))
, m_evalContextType(static_cast<unsigned>(info.evalContextType()))
+ , m_hasTailCalls(false)
, m_lineCount(0)
, m_endColumn(UINT_MAX)
, m_didOptimize(MixedTriState)
Modified: trunk/Source/_javascript_Core/bytecode/UnlinkedCodeBlock.h (223690 => 223691)
--- trunk/Source/_javascript_Core/bytecode/UnlinkedCodeBlock.h 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/bytecode/UnlinkedCodeBlock.h 2017-10-19 16:27:44 UTC (rev 223691)
@@ -129,6 +129,8 @@
EvalContextType evalContextType() const { return static_cast<EvalContextType>(m_evalContextType); }
bool isArrowFunctionContext() const { return m_isArrowFunctionContext; }
bool isClassContext() const { return m_isClassContext; }
+ bool hasTailCalls() const { return m_hasTailCalls; }
+ void setHasTailCalls() { m_hasTailCalls = true; }
void addExpressionInfo(unsigned instructionOffset, int divot,
int startOffset, int endOffset, unsigned line, unsigned column);
@@ -442,6 +444,7 @@
unsigned m_constructorKind : 2;
unsigned m_derivedContextType : 2;
unsigned m_evalContextType : 2;
+ unsigned m_hasTailCalls : 1;
unsigned m_lineCount;
unsigned m_endColumn;
Modified: trunk/Source/_javascript_Core/bytecompiler/BytecodeGenerator.cpp (223690 => 223691)
--- trunk/Source/_javascript_Core/bytecompiler/BytecodeGenerator.cpp 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/bytecompiler/BytecodeGenerator.cpp 2017-10-19 16:27:44 UTC (rev 223691)
@@ -1313,6 +1313,12 @@
void BytecodeGenerator::emitEnter()
{
emitOpcode(op_enter);
+
+ // We must add the end of op_enter as a potential jump target, because the bytecode parser may decide to split its basic block
+ // to have somewhere to jump to if there is a recursive tail-call that points to this function.
+ m_codeBlock->addJumpTarget(instructions().size());
+ // This disables peephole optimizations when an instruction is a jump target
+ m_lastOpcodeID = op_end;
}
void BytecodeGenerator::emitLoopHint()
@@ -3347,7 +3353,11 @@
RegisterID* BytecodeGenerator::emitCallInTailPosition(RegisterID* dst, RegisterID* func, ExpectedFunction expectedFunction, CallArguments& callArguments, const JSTextPosition& divot, const JSTextPosition& divotStart, const JSTextPosition& divotEnd, DebuggableCall debuggableCall)
{
- return emitCall(m_inTailPosition ? op_tail_call : op_call, dst, func, expectedFunction, callArguments, divot, divotStart, divotEnd, debuggableCall);
+ if (m_inTailPosition) {
+ m_codeBlock->setHasTailCalls();
+ return emitCall(op_tail_call, dst, func, expectedFunction, callArguments, divot, divotStart, divotEnd, debuggableCall);
+ }
+ return emitCall(op_call, dst, func, expectedFunction, callArguments, divot, divotStart, divotEnd, debuggableCall);
}
RegisterID* BytecodeGenerator::emitCallEval(RegisterID* dst, RegisterID* func, CallArguments& callArguments, const JSTextPosition& divot, const JSTextPosition& divotStart, const JSTextPosition& divotEnd, DebuggableCall debuggableCall)
Modified: trunk/Source/_javascript_Core/dfg/DFGByteCodeParser.cpp (223690 => 223691)
--- trunk/Source/_javascript_Core/dfg/DFGByteCodeParser.cpp 2017-10-19 16:10:47 UTC (rev 223690)
+++ trunk/Source/_javascript_Core/dfg/DFGByteCodeParser.cpp 2017-10-19 16:27:44 UTC (rev 223691)
@@ -224,6 +224,7 @@
void emitFunctionChecks(CallVariant, Node* callTarget, VirtualRegister thisArgumnt);
void emitArgumentPhantoms(int registerOffset, int argumentCountIncludingThis);
Node* getArgumentCount();
+ bool handleRecursiveTailCall(Node* callTargetNode, const CallLinkStatus&, int registerOffset, VirtualRegister thisArgument, int argumentCountIncludingThis);
unsigned inliningCost(CallVariant, int argumentCountIncludingThis, InlineCallFrame::Kind); // Return UINT_MAX if it's not an inlining candidate. By convention, intrinsics have a cost of 1.
// Handle inlining. Return true if it succeeded, false if we need to plant a call.
bool handleInlining(Node* callTargetNode, int resultOperand, const CallLinkStatus&, int registerOffset, VirtualRegister thisArgument, VirtualRegister argumentsArgument, unsigned argumentsOffset, int argumentCountIncludingThis, unsigned nextOffset, NodeType callOp, InlineCallFrame::Kind, SpeculatedType prediction);
@@ -231,7 +232,6 @@
bool attemptToInlineCall(Node* callTargetNode, int resultOperand, CallVariant, int registerOffset, int argumentCountIncludingThis, unsigned nextOffset, InlineCallFrame::Kind, SpeculatedType prediction, unsigned& inliningBalance, BasicBlock* continuationBlock, const ChecksFunctor& insertChecks);
template<typename ChecksFunctor>
void inlineCall(Node* callTargetNode, int resultOperand, CallVariant, int registerOffset, int argumentCountIncludingThis, unsigned nextOffset, InlineCallFrame::Kind, BasicBlock* continuationBlock, const ChecksFunctor& insertChecks);
- void cancelLinkingForBlock(InlineStackEntry*, BasicBlock*); // Only works when the given block is the last one to have been added for that inline stack entry.
// Handle intrinsic functions. Return true if it succeeded, false if we need to plant a call.
template<typename ChecksFunctor>
bool handleIntrinsicCall(Node* callee, int resultOperand, Intrinsic, int registerOffset, int argumentCountIncludingThis, SpeculatedType prediction, const ChecksFunctor& insertChecks);
@@ -1127,9 +1127,6 @@
// cannot have two blocks that have the same bytecodeBegin.
Vector<BasicBlock*> m_blockLinkingTargets;
- // This is set by op_enter in parseBlock(), and indicates the first block of the function.
- BasicBlock* m_entryBlock;
-
// Optional: a continuation block for returns to jump to. It is set by early returns if it does not exist.
BasicBlock* m_continuationBlock;
@@ -1217,6 +1214,9 @@
ASSERT(bytecodeIndex != UINT_MAX);
Ref<BasicBlock> block = adoptRef(*new BasicBlock(bytecodeIndex, m_numArguments, m_numLocals, 1));
BasicBlock* blockPtr = block.ptr();
+ // m_blockLinkingTargets must always be sorted in increasing order of bytecodeBegin
+ if (m_inlineStackTop->m_blockLinkingTargets.size())
+ ASSERT(m_inlineStackTop->m_blockLinkingTargets.last()->bytecodeBegin < bytecodeIndex);
m_inlineStackTop->m_blockLinkingTargets.append(blockPtr);
m_graph.appendBlock(WTFMove(block));
return blockPtr;
@@ -1234,6 +1234,9 @@
{
ASSERT(block->bytecodeBegin == UINT_MAX);
block->bytecodeBegin = bytecodeIndex;
+ // m_blockLinkingTargets must always be sorted in increasing order of bytecodeBegin
+ if (m_inlineStackTop->m_blockLinkingTargets.size())
+ ASSERT(m_inlineStackTop->m_blockLinkingTargets.last()->bytecodeBegin < bytecodeIndex);
m_inlineStackTop->m_blockLinkingTargets.append(block);
}
@@ -1295,6 +1298,9 @@
if (callLinkStatus.canOptimize()) {
VirtualRegister thisArgument = virtualRegisterForArgument(0, registerOffset);
+ if (op == TailCall && handleRecursiveTailCall(callTarget, callLinkStatus, registerOffset, thisArgument, argumentCountIncludingThis))
+ return Terminal;
+
// Inlining is quite complex, and managed by a pipeline of functions:
// handle(Varargs)Call -> handleInlining -> attemptToInlineCall -> inlineCall
// - handleCall and handleVarargsCall deal with the case where no inlining happens, and do some sanity checks on their arguments
@@ -1412,6 +1418,74 @@
addToGraph(Phantom, get(virtualRegisterForArgument(i, registerOffset)));
}
+bool ByteCodeParser::handleRecursiveTailCall(Node* callTargetNode, const CallLinkStatus& callLinkStatus, int registerOffset, VirtualRegister thisArgument, int argumentCountIncludingThis)
+{
+ // FIXME: We currently only do this optimisation in the simple, non-polymorphic case.
+ // https://bugs.webkit.org/show_bug.cgi?id=178390
+ if (callLinkStatus.couldTakeSlowPath() || callLinkStatus.size() != 1)
+ return false;
+
+ auto targetExecutable = callLinkStatus[0].executable();
+ InlineStackEntry* stackEntry = m_inlineStackTop;
+ do {
+ if (targetExecutable != stackEntry->executable())
+ continue;
+ VERBOSE_LOG(" We found a recursive tail call, trying to optimize it into a jump.\n");
+
+ if (auto* callFrame = stackEntry->m_inlineCallFrame) {
+ // Some code may statically use the argument count from the InlineCallFrame, so it would be invalid to loop back if it does not match.
+ // We "continue" instead of returning false in case another stack entry further on the stack has the right number of arguments.
+ if (argumentCountIncludingThis != static_cast<int>(callFrame->argumentCountIncludingThis))
+ continue;
+ } else {
+ // We are in the machine code entry (i.e. the original caller).
+ // If we have more arguments than the number of parameters to the function, it is not clear where we could put them on the stack.
+ if (argumentCountIncludingThis > m_codeBlock->numParameters())
+ return false;
+ }
+
+ // We must add some check that the profiling information was correct and the target of this call is what we thought
+ emitFunctionChecks(callLinkStatus[0], callTargetNode, thisArgument);
+ // We flush everything, as if we were in the backedge of a loop (see treatment of op_jmp in parseBlock).
+ flushForTerminal();
+
+ // We must set the arguments to the right values
+ int argIndex = 0;
+ for (; argIndex < argumentCountIncludingThis; ++argIndex) {
+ Node* value = get(virtualRegisterForArgument(argIndex, registerOffset));
+ setDirect(stackEntry->remapOperand(virtualRegisterForArgument(argIndex)), value, NormalSet);
+ }
+ Node* undefined = addToGraph(JSConstant, OpInfo(m_constantUndefined));
+ for (; argIndex < stackEntry->m_codeBlock->numParameters(); ++argIndex)
+ setDirect(stackEntry->remapOperand(virtualRegisterForArgument(argIndex)), undefined, NormalSet);
+
+ // We must repeat the work of op_enter here as we will jump right after it.
+ // We jump right after it and not before it, because of some invariant saying that a CFG root cannot have predecessors in the IR.
+ for (int i = 0; i < stackEntry->m_codeBlock->m_numVars; ++i)
+ setDirect(stackEntry->remapOperand(virtualRegisterForLocal(i)), undefined, NormalSet);
+
+ // We want to emit the SetLocals with an exit origin that points to the place we are jumping to.
+ unsigned oldIndex = m_currentIndex;
+ auto oldStackTop = m_inlineStackTop;
+ m_inlineStackTop = stackEntry;
+ m_currentIndex = OPCODE_LENGTH(op_enter);
+ m_exitOK = true;
+ processSetLocalQueue();
+ m_currentIndex = oldIndex;
+ m_inlineStackTop = oldStackTop;
+ m_exitOK = false;
+
+ BasicBlock** entryBlockPtr = tryBinarySearch<BasicBlock*, unsigned>(stackEntry->m_blockLinkingTargets, stackEntry->m_blockLinkingTargets.size(), OPCODE_LENGTH(op_enter), getBytecodeBeginForBlock);
+ RELEASE_ASSERT(entryBlockPtr);
+ addJumpTo(*entryBlockPtr);
+ return true;
+ // It would be unsound to jump over a non-tail call: the "tail" call is not really a tail call in that case.
+ } while (stackEntry->m_inlineCallFrame && stackEntry->m_inlineCallFrame->kind == TailCall && (stackEntry = stackEntry->m_caller));
+
+ // The tail call was not recursive
+ return false;
+}
+
unsigned ByteCodeParser::inliningCost(CallVariant callee, int argumentCountIncludingThis, InlineCallFrame::Kind kind)
{
CallMode callMode = InlineCallFrame::callModeFor(kind);
@@ -4214,8 +4288,6 @@
for (int i = 0; i < m_inlineStackTop->m_codeBlock->m_numVars; ++i)
set(virtualRegisterForLocal(i), undefined, ImmediateNakedSet);
- m_inlineStackTop->m_entryBlock = m_currentBlock;
-
NEXT_OPCODE(op_enter);
}
@@ -5402,7 +5474,8 @@
if (terminality == NonTerminal)
NEXT_OPCODE(op_tail_call);
else
- LAST_OPCODE(op_tail_call);
+ LAST_OPCODE_LINKED(op_tail_call);
+ // We use LAST_OPCODE_LINKED instead of LAST_OPCODE because if the tail call was optimized, it may now be a jump to a bytecode index in a different InlineStackEntry.
}
case op_construct:
@@ -6430,8 +6503,6 @@
linkBlocks(inlineStackEntry.m_unlinkedBlocks, inlineStackEntry.m_blockLinkingTargets);
m_graph.determineReachability();
- if (Options::verboseDFGBytecodeParsing())
- m_graph.dump();
m_graph.killUnreachableBlocks();
for (BlockIndex blockIndex = m_graph.numBlocks(); blockIndex--;) {