https://github.com/python/cpython/commit/f1883852ed4cd1923e619e71437641d06873503d
commit: f1883852ed4cd1923e619e71437641d06873503d
branch: main
author: Mark Shannon <[email protected]>
committer: markshannon <[email protected]>
date: 2025-10-17T11:26:17+01:00
summary:

GH-135904: Implement assembler optimization for AArch64. (GH-139855)

files:
M Python/jit.c
M Tools/jit/_optimizers.py
M Tools/jit/_schema.py
M Tools/jit/_stencils.py
M Tools/jit/_targets.py

diff --git a/Python/jit.c b/Python/jit.c
index 01ec9c1fa6e8a9..ebd0d90385e002 100644
--- a/Python/jit.c
+++ b/Python/jit.c
@@ -167,11 +167,13 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t 
value, uint8_t value_start,
 
 // See 
https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions
 // for instruction encodings:
-#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000)
-#define IS_AARCH64_ADRP(I)       (((I) & 0x9F000000) == 0x90000000)
-#define IS_AARCH64_BRANCH(I)     (((I) & 0x7C000000) == 0x14000000)
-#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000)
-#define IS_AARCH64_MOV(I)        (((I) & 0x9F800000) == 0x92800000)
+#define IS_AARCH64_ADD_OR_SUB(I)  (((I) & 0x11C00000) == 0x11000000)
+#define IS_AARCH64_ADRP(I)        (((I) & 0x9F000000) == 0x90000000)
+#define IS_AARCH64_BRANCH(I)      (((I) & 0x7C000000) == 0x14000000)
+#define IS_AARCH64_BRANCH_COND(I) (((I) & 0x7C000000) == 0x54000000)
+#define IS_AARCH64_TEST_AND_BRANCH(I) (((I) & 0x7E000000) == 0x36000000)
+#define IS_AARCH64_LDR_OR_STR(I)  (((I) & 0x3B000000) == 0x39000000)
+#define IS_AARCH64_MOV(I)         (((I) & 0x9F800000) == 0x92800000)
 
 // LLD is a great reference for performing relocations... just keep in
 // mind that Tools/jit/build.py does filtering and preprocessing for us!
@@ -332,6 +334,21 @@ patch_aarch64_21rx(unsigned char *location, uint64_t value)
     patch_aarch64_21r(location, value);
 }
 
+// 21-bit relative branch.
+void
+patch_aarch64_19r(unsigned char *location, uint64_t value)
+{
+    uint32_t *loc32 = (uint32_t *)location;
+    assert(IS_AARCH64_BRANCH_COND(*loc32));
+    value -= (uintptr_t)location;
+    // Check that we're not out of range of 21 signed bits:
+    assert((int64_t)value >= -(1 << 20));
+    assert((int64_t)value < (1 << 20));
+    // Since instructions are 4-byte aligned, only use 19 bits:
+    assert(get_bits(value, 0, 2) == 0);
+    set_bits(loc32, 5, value, 2, 19);
+}
+
 // 28-bit relative branch.
 void
 patch_aarch64_26r(unsigned char *location, uint64_t value)
diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py
index 33db110b728dba..866417398b0ba5 100644
--- a/Tools/jit/_optimizers.py
+++ b/Tools/jit/_optimizers.py
@@ -39,6 +39,34 @@
 # Update with all of the inverted branches, too:
 _X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v}
 
+_AARCH64_COND_CODES = {
+    # https://developer.arm.com/documentation/dui0801/b/CJAJIHAD?lang=en
+    "eq": "ne",
+    "ne": "eq",
+    "lt": "ge",
+    "ge": "lt",
+    "gt": "le",
+    "le": "gt",
+    "vs": "vc",
+    "vc": "vs",
+    "mi": "pl",
+    "pl": "mi",
+    "cs": "cc",
+    "cc": "cs",
+    "hs": "lo",
+    "lo": "hs",
+    "hi": "ls",
+    "ls": "hi",
+}
+# Branches are either b.{cond} or bc.{cond}
+_AARCH64_BRANCHES = {
+    "b." + cond: ("b." + inverse if inverse else None)
+    for (cond, inverse) in _AARCH64_COND_CODES.items()
+} | {
+    "bc." + cond: ("bc." + inverse if inverse else None)
+    for (cond, inverse) in _AARCH64_COND_CODES.items()
+}
+
 
 @dataclasses.dataclass
 class _Block:
@@ -283,11 +311,26 @@ def run(self) -> None:
         self.path.write_text(self._body())
 
 
+# Mach-O does not support the 19 bit branch locations needed for branch 
reordering
+class OptimizerAArch64_MachO(Optimizer):  # pylint: disable = 
too-few-public-methods
+    """aarch64-apple-darwin"""
+
+    # 
https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
+    _re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
+
+
 class OptimizerAArch64(Optimizer):  # pylint: disable = too-few-public-methods
-    
"""aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
+    """aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
+
+    _branches = _AARCH64_BRANCHES
+    _re_branch = re.compile(
+        
rf"\s*(?P<instruction>{'|'.join(_AARCH64_BRANCHES)})\s+(.+,\s+)*(?P<target>[\w.]+)"
+    )
 
     # 
https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
     _re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
+    # 
https://developer.arm.com/documentation/ddi0602/2025-09/Base-Instructions/RET--Return-from-subroutine-
+    _re_return = re.compile(r"\s*ret\b")
 
 
 class OptimizerX86(Optimizer):  # pylint: disable = too-few-public-methods
diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py
index 228fc389584dd7..c47e9af924a20e 100644
--- a/Tools/jit/_schema.py
+++ b/Tools/jit/_schema.py
@@ -10,6 +10,7 @@
     "ARM64_RELOC_PAGEOFF12",
     "ARM64_RELOC_UNSIGNED",
     "IMAGE_REL_AMD64_REL32",
+    "IMAGE_REL_ARM64_BRANCH19",
     "IMAGE_REL_ARM64_BRANCH26",
     "IMAGE_REL_ARM64_PAGEBASE_REL21",
     "IMAGE_REL_ARM64_PAGEOFFSET_12A",
@@ -20,6 +21,7 @@
     "R_AARCH64_ADR_GOT_PAGE",
     "R_AARCH64_ADR_PREL_PG_HI21",
     "R_AARCH64_CALL26",
+    "R_AARCH64_CONDBR19",
     "R_AARCH64_JUMP26",
     "R_AARCH64_ADD_ABS_LO12_NC",
     "R_AARCH64_LD64_GOT_LO12_NC",
diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py
index 14606b036db519..16bc1ea4e17e6b 100644
--- a/Tools/jit/_stencils.py
+++ b/Tools/jit/_stencils.py
@@ -61,6 +61,7 @@ class HoleValue(enum.Enum):
     # x86_64-pc-windows-msvc:
     "IMAGE_REL_AMD64_REL32": "patch_x86_64_32rx",
     # aarch64-pc-windows-msvc:
+    "IMAGE_REL_ARM64_BRANCH19": "patch_aarch64_19r",
     "IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r",
     "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx",
     "IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12",
@@ -74,6 +75,7 @@ class HoleValue(enum.Enum):
     "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx",
     "R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r",
     "R_AARCH64_CALL26": "patch_aarch64_26r",
+    "R_AARCH64_CONDBR19": "patch_aarch64_19r",
     "R_AARCH64_JUMP26": "patch_aarch64_26r",
     "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x",
     "R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a",
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index 9fc3522d23d982..7ff7c4fba49652 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -335,7 +335,8 @@ def _handle_relocation(
                 "Offset": offset,
                 "Symbol": s,
                 "Type": {
-                    "Name": "IMAGE_REL_ARM64_BRANCH26"
+                    "Name": "IMAGE_REL_ARM64_BRANCH19"
+                    | "IMAGE_REL_ARM64_BRANCH26"
                     | "IMAGE_REL_ARM64_PAGEBASE_REL21"
                     | "IMAGE_REL_ARM64_PAGEOFFSET_12A"
                     | "IMAGE_REL_ARM64_PAGEOFFSET_12L" as kind
@@ -564,7 +565,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | 
_MachO:
     if re.fullmatch(r"aarch64-apple-darwin.*", host):
         host = "aarch64-apple-darwin"
         condition = "defined(__aarch64__) && defined(__APPLE__)"
-        optimizer = _optimizers.OptimizerAArch64
+        optimizer = _optimizers.OptimizerAArch64_MachO
         target = _MachO(host, condition, optimizer=optimizer)
     elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
         host = "aarch64-pc-windows-msvc"

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to