Hi

A bunch of functions are the most used in queries. The llvmjit inlining system 
(tries to) get rid of the overhead of calling any function, but this is 
triggered only when the cost is above jit_inline_above_cost. There is thus a 
nice performance boost to have by choosing to "manually" inline a few specific 
PostgreSQL functions.
In this first patch, I've chosen to inline int4eq, int8eq and most date 
comparison functions.
They are definitely small, the generated code is not bigger when inlining them 
(surprisingly, on amd64 the code is even smaller) and are unlikely to ever 
change so there won't be any maintenance burden here.
On my AMD 3900XT system, this gives me a 3 to 5% performance improvement when 
running 1M dateeq calls.
I will likely submit a second patch version later with more functions 
implemented, but I thought this was an interesting first result worth 
discussing already.

Regards
From 43d4f42cc07eb0f7ca5ccb06480acce02695f4fe Mon Sep 17 00:00:00 2001
From: Pierre Ducroquet <[email protected]>
Date: Fri, 23 Jan 2026 17:29:02 +0100
Subject: [PATCH] llvmjit: introduce force-inlined functions

A few functions are, by far, the most used functions in PostgreSQL
queries. These are comparison functions (mostly integer and date).
Today, when using these through llvmjit, if we are not above
jit_inline_above_cost, these are not inlined, and thus suffer the
cost of the fmgr function call "ABI", and are blackboxes to the LLVM
optimizer, even when running with O0.
Moreover, these functions are short enough that always inlining them
does not increase the generated code size. On amd64, the code is
increased by one instruction but reduced by 11 bytes, so this does
not matter much.

But since this allows a net reduction in memory accesses, there are
immediate benefits to this, and the compilation time impact is not
measurable.

On my AMD 3900XT system, on a simple query running 1 million dateeq,
I get a 3.5 to 5% speed increase, even against the current inlining
system.

Note that this is the first version of this patch, I plan to add more
functions, but I am careful with each one added not to have a negative
impact on the generated machine code.
---
 src/backend/jit/llvm/llvmjit_expr.c | 107 ++++++++++++++++++++++++++--
 1 file changed, 103 insertions(+), 4 deletions(-)

diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c
index 650f1d42a93..528adf2ce72 100644
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -70,6 +70,102 @@ static LLVMValueRef create_LifetimeEnd(LLVMModuleRef mod);
 					   lengthof(((LLVMValueRef[]){__VA_ARGS__})), \
 					   ((LLVMValueRef[]){__VA_ARGS__}))
 
+static bool
+llvm_try_inline_function_call(LLVMBuilderRef b, FunctionCallInfo fcinfo, LLVMValueRef v_resvaluep, LLVMValueRef v_resnullp)
+{
+	LLVMValueRef v_fcinfo, v_retval;
+	v_fcinfo = l_ptr_const(fcinfo, l_ptr(StructFunctionCallInfoData));
+	if (fcinfo->flinfo->fn_addr == &int4eq) {
+		/* load arg 1 */
+		LLVMValueRef arg0_ptr = l_funcvaluep(b, v_fcinfo, 0);
+		LLVMValueRef arg0 = LLVMBuildLoad2(b, LLVMInt64Type(), arg0_ptr, "arg0_64");
+		LLVMValueRef arg0_truncated = LLVMBuildTrunc(b, arg0, LLVMInt32Type(), "arg0_32");
+		/* load arg 2 */
+		LLVMValueRef arg1_ptr = l_funcvaluep(b, v_fcinfo, 1);
+		LLVMValueRef arg1 = LLVMBuildLoad2(b, LLVMInt64Type(), arg1_ptr, "arg1_64");
+		LLVMValueRef arg1_truncated = LLVMBuildTrunc(b, arg1, LLVMInt32Type(), "arg1_32");
+		/* compare and convert result */
+		LLVMValueRef cmp = LLVMBuildICmp(b, LLVMIntEQ, arg0_truncated, arg1_truncated, "int_eq");
+		v_retval = LLVMBuildZExt(b, cmp, LLVMInt64Type(), "int_eq_ext");
+		LLVMBuildStore(b, v_retval, v_resvaluep);
+		LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
+		return true;
+	} else if (fcinfo->flinfo->fn_addr == &int8eq) {
+		/* load arg 0 */
+		LLVMValueRef arg0_ptr = l_funcvaluep(b, v_fcinfo, 0);
+		LLVMValueRef arg0 = LLVMBuildLoad2(b, LLVMInt64Type(), arg0_ptr, "arg0_64");
+		/* load arg 1 */
+		LLVMValueRef arg1_ptr = l_funcvaluep(b, v_fcinfo, 1);
+		LLVMValueRef arg1 = LLVMBuildLoad2(b, LLVMInt64Type(), arg1_ptr, "arg1_64");
+		/* compare and convert result */
+		LLVMValueRef cmp = LLVMBuildICmp(b, LLVMIntEQ, arg0, arg1, "int_eq");
+		v_retval = LLVMBuildZExt(b, cmp, LLVMInt64Type(), "int_eq_ext");
+		LLVMBuildStore(b, v_retval, v_resvaluep);
+		LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
+		return true;
+	} else if (fcinfo->flinfo->fn_addr == &date_lt) {
+		/* load arg 1 */
+		LLVMValueRef arg0_ptr = l_funcvaluep(b, v_fcinfo, 0);
+		LLVMValueRef arg0 = LLVMBuildLoad2(b, LLVMInt64Type(), arg0_ptr, "arg0_64");
+		LLVMValueRef arg0_truncated = LLVMBuildTrunc(b, arg0, LLVMInt32Type(), "arg0_32");
+		/* load arg 2 */
+		LLVMValueRef arg1_ptr = l_funcvaluep(b, v_fcinfo, 1);
+		LLVMValueRef arg1 = LLVMBuildLoad2(b, LLVMInt64Type(), arg1_ptr, "arg1_64");
+		LLVMValueRef arg1_truncated = LLVMBuildTrunc(b, arg1, LLVMInt32Type(), "arg1_32");
+		/* compare and convert result */
+		LLVMValueRef cmp = LLVMBuildICmp(b, LLVMIntSLT, arg0_truncated, arg1_truncated, "date_lt");
+		v_retval = LLVMBuildZExt(b, cmp, LLVMInt64Type(), "int_eq_ext");
+		LLVMBuildStore(b, v_retval, v_resvaluep);
+		LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
+		return true;
+	} else if (fcinfo->flinfo->fn_addr == &date_gt) {
+		/* load arg 1 */
+		LLVMValueRef arg0_ptr = l_funcvaluep(b, v_fcinfo, 0);
+		LLVMValueRef arg0 = LLVMBuildLoad2(b, LLVMInt64Type(), arg0_ptr, "arg0_64");
+		LLVMValueRef arg0_truncated = LLVMBuildTrunc(b, arg0, LLVMInt32Type(), "arg0_32");
+		/* load arg 2 */
+		LLVMValueRef arg1_ptr = l_funcvaluep(b, v_fcinfo, 1);
+		LLVMValueRef arg1 = LLVMBuildLoad2(b, LLVMInt64Type(), arg1_ptr, "arg1_64");
+		LLVMValueRef arg1_truncated = LLVMBuildTrunc(b, arg1, LLVMInt32Type(), "arg1_32");
+		/* compare and convert result */
+		LLVMValueRef cmp = LLVMBuildICmp(b, LLVMIntSGT, arg0_truncated, arg1_truncated, "date_gt");
+		v_retval = LLVMBuildZExt(b, cmp, LLVMInt64Type(), "int_eq_ext");
+		LLVMBuildStore(b, v_retval, v_resvaluep);
+		LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
+		return true;
+	} else if (fcinfo->flinfo->fn_addr == &date_le) {
+		/* load arg 1 */
+		LLVMValueRef arg0_ptr = l_funcvaluep(b, v_fcinfo, 0);
+		LLVMValueRef arg0 = LLVMBuildLoad2(b, LLVMInt64Type(), arg0_ptr, "arg0_64");
+		LLVMValueRef arg0_truncated = LLVMBuildTrunc(b, arg0, LLVMInt32Type(), "arg0_32");
+		/* load arg 2 */
+		LLVMValueRef arg1_ptr = l_funcvaluep(b, v_fcinfo, 1);
+		LLVMValueRef arg1 = LLVMBuildLoad2(b, LLVMInt64Type(), arg1_ptr, "arg1_64");
+		LLVMValueRef arg1_truncated = LLVMBuildTrunc(b, arg1, LLVMInt32Type(), "arg1_32");
+		/* compare and convert result */
+		LLVMValueRef cmp = LLVMBuildICmp(b, LLVMIntSLE, arg0_truncated, arg1_truncated, "date_le");
+		v_retval = LLVMBuildZExt(b, cmp, LLVMInt64Type(), "int_eq_ext");
+		LLVMBuildStore(b, v_retval, v_resvaluep);
+		LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
+		return true;
+	} else if (fcinfo->flinfo->fn_addr == &date_ge) {
+		/* load arg 1 */
+		LLVMValueRef arg0_ptr = l_funcvaluep(b, v_fcinfo, 0);
+		LLVMValueRef arg0 = LLVMBuildLoad2(b, LLVMInt64Type(), arg0_ptr, "arg0_64");
+		LLVMValueRef arg0_truncated = LLVMBuildTrunc(b, arg0, LLVMInt32Type(), "arg0_32");
+		/* load arg 2 */
+		LLVMValueRef arg1_ptr = l_funcvaluep(b, v_fcinfo, 1);
+		LLVMValueRef arg1 = LLVMBuildLoad2(b, LLVMInt64Type(), arg1_ptr, "arg1_64");
+		LLVMValueRef arg1_truncated = LLVMBuildTrunc(b, arg1, LLVMInt32Type(), "arg1_32");
+		/* compare and convert result */
+		LLVMValueRef cmp = LLVMBuildICmp(b, LLVMIntSGE, arg0_truncated, arg1_truncated, "date_ge");
+		v_retval = LLVMBuildZExt(b, cmp, LLVMInt64Type(), "int_eq_ext");
+		LLVMBuildStore(b, v_retval, v_resvaluep);
+		LLVMBuildStore(b, l_sbool_const(0), v_resnullp);
+		return true;
+	}
+	return false;
+}
 
 /*
  * JIT compile expression.
@@ -739,10 +835,13 @@ llvm_compile_expr(ExprState *state)
 						LLVMPositionBuilderAtEnd(b, b_nonull);
 					}
 
-					v_retval = BuildV1Call(context, b, mod, fcinfo,
-										   &v_fcinfo_isnull);
-					LLVMBuildStore(b, v_retval, v_resvaluep);
-					LLVMBuildStore(b, v_fcinfo_isnull, v_resnullp);
+					if (!llvm_try_inline_function_call(b, fcinfo, v_resvaluep, v_resnullp)) {
+						/* No inlining done, do the direct call instead */
+						v_retval = BuildV1Call(context, b, mod, fcinfo,
+											   &v_fcinfo_isnull);
+						LLVMBuildStore(b, v_retval, v_resvaluep);
+						LLVMBuildStore(b, v_fcinfo_isnull, v_resnullp);
+					}
 
 					LLVMBuildBr(b, opblocks[opno + 1]);
 					break;
-- 
2.43.0

Reply via email to