https://github.com/python/cpython/commit/51185923a8dfdb59fc04f235fd19881d10d65acf
commit: 51185923a8dfdb59fc04f235fd19881d10d65acf
branch: main
author: Brandt Bucher <[email protected]>
committer: brandtbucher <[email protected]>
date: 2024-08-14T07:53:46-07:00
summary:
GH-113464: Speed up JIT builds (GH-122839)
files:
M Tools/jit/_targets.py
M Tools/jit/template.c
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index 73d10a128756eb..e37ee943999785 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -182,15 +182,27 @@ async def _compile(
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
- opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n",
generated_cases))
+ cases_and_opnames = sorted(
+ re.findall(
+ r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases,
flags=re.DOTALL
+ )
+ )
tasks = []
with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve()
async with asyncio.TaskGroup() as group:
coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c",
work)
tasks.append(group.create_task(coro, name="trampoline"))
- for opname in opnames:
- coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
+ template = TOOLS_JIT_TEMPLATE_C.read_text()
+ for case, opname in cases_and_opnames:
+ # Write out a copy of the template with *only* this case
+ # inserted. This is about twice as fast as #include'ing all
+ # of executor_cases.c.h each time we compile (since the C
+ # compiler wastes a bunch of time parsing the dead code for
+ # all of the other cases):
+ c = work / f"{opname}.c"
+ c.write_text(template.replace("CASE", case))
+ coro = self._compile(opname, c, work)
tasks.append(group.create_task(coro, name=opname))
return {task.get_name(): task.result() for task in tasks}
diff --git a/Tools/jit/template.c b/Tools/jit/template.c
index ec7d033e89deff..6cf15085f79933 100644
--- a/Tools/jit/template.c
+++ b/Tools/jit/template.c
@@ -84,6 +84,8 @@ do { \
#undef WITHIN_STACK_BOUNDS
#define WITHIN_STACK_BOUNDS() 1
+#define TIER_TWO 2
+
_Py_CODEUNIT *
_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer,
PyThreadState *tstate)
{
@@ -107,9 +109,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef
*stack_pointer, PyThreadState
OPT_STAT_INC(uops_executed);
UOP_STAT_INC(uopcode, execution_count);
- // The actual instruction definitions (only one will be used):
switch (uopcode) {
-#include "executor_cases.c.h"
+ // The actual instruction definition gets inserted here:
+ CASE
default:
Py_UNREACHABLE();
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]