This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new db5392bf fix(substrait): emit decimal literals as 16-byte
little-endian (#1130)
db5392bf is described below
commit db5392bf4f9c8fff37215fac32852483f1cd4ac7
Author: Andrei Tserakhau <[email protected]>
AuthorDate: Wed May 27 18:16:58 2026 +0200
fix(substrait): emit decimal literals as 16-byte little-endian (#1130)
DecimalLiteral.MarshalBinary returns big-endian min-byte; Substrait's
expr.NewLiteral needs 16-byte little-endian and was silently returning
nil for everything else, panicking downstream. Convert before emit and
add whitebox tests covering small/zero/16-byte-negative values.
---
table/substrait/endian_be.go | 24 ++++++++++++++
table/substrait/endian_fallback.go | 31 ++++++++++++++++++
table/substrait/endian_le.go | 24 ++++++++++++++
table/substrait/substrait.go | 18 +++++++++--
table/substrait/substrait_decimal_internal_test.go | 37 ++++++++++++++++++++++
5 files changed, 132 insertions(+), 2 deletions(-)
diff --git a/table/substrait/endian_be.go b/table/substrait/endian_be.go
new file mode 100644
index 00000000..220e7f8f
--- /dev/null
+++ b/table/substrait/endian_be.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc ||
ppc64 || s390 || s390x || shbe || sparc || sparc64
+
+package substrait
+
+// hostIsLE is a compile-time constant on known big-endian GOARCHes so the
+// compiler can dead-code-eliminate the little-endian branch in
toDecimalLiteral.
+const hostIsLE = false
diff --git a/table/substrait/endian_fallback.go
b/table/substrait/endian_fallback.go
new file mode 100644
index 00000000..d4bdff6c
--- /dev/null
+++ b/table/substrait/endian_fallback.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build !(386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 ||
mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh
|| wasm || armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc ||
ppc64 || s390 || s390x || shbe || sparc || sparc64)
+
+package substrait
+
+import "unsafe"
+
+// hostIsLE is a runtime probe for GOARCHes not enumerated in endian_le.go or
+// endian_be.go. The compiler cannot dead-code-eliminate the unused branch in
+// toDecimalLiteral on these architectures, but the build still works.
+var hostIsLE = func() bool {
+ var probe uint16 = 1
+
+ return *(*byte)(unsafe.Pointer(&probe)) == 1
+}()
diff --git a/table/substrait/endian_le.go b/table/substrait/endian_le.go
new file mode 100644
index 00000000..e43ced0e
--- /dev/null
+++ b/table/substrait/endian_le.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 ||
mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh
|| wasm
+
+package substrait
+
+// hostIsLE is a compile-time constant on known little-endian GOARCHes so the
+// compiler can dead-code-eliminate the big-endian branch in toDecimalLiteral.
+const hostIsLE = true
diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go
index 6876893e..25bec917 100644
--- a/table/substrait/substrait.go
+++ b/table/substrait/substrait.go
@@ -19,8 +19,10 @@ package substrait
import (
_ "embed"
+ "encoding/binary"
"fmt"
"strings"
+ "unsafe"
"github.com/apache/arrow-go/v18/arrow/compute/exprs"
"github.com/apache/iceberg-go"
@@ -270,10 +272,22 @@ func toDecimalLiteral(typ iceberg.Type, v
iceberg.DecimalLiteral) expr.Literal {
precision = dt.Precision()
}
- byts, _ := v.MarshalBinary()
+ // decimal128.Num is {lo uint64, hi int64} — its raw in-memory bytes on
+ // LE hosts are already the 16-byte little-endian two's-complement
+ // representation Substrait wants. hostIsLE is a build-tag const, so the
+ // compiler drops the unused branch on every known GOARCH.
+ n := v.Value().Val
+ buf := make([]byte, 16)
+ if hostIsLE {
+ copy(buf, unsafe.Slice((*byte)(unsafe.Pointer(&n)), 16))
+ } else {
+ binary.LittleEndian.PutUint64(buf[:8], n.LowBits())
+ binary.LittleEndian.PutUint64(buf[8:], uint64(n.HighBits()))
+ }
+
result, _ := expr.NewLiteral(&types.Decimal{
Scale: int32(v.Scale),
- Value: byts,
+ Value: buf,
Precision: int32(precision),
}, false)
diff --git a/table/substrait/substrait_decimal_internal_test.go
b/table/substrait/substrait_decimal_internal_test.go
index 60e9f99e..f0d6edf6 100644
--- a/table/substrait/substrait_decimal_internal_test.go
+++ b/table/substrait/substrait_decimal_internal_test.go
@@ -91,3 +91,40 @@ func TestToDecimalLiteralFallbackWhenTypIsNotDecimal(t
*testing.T) {
assert.Equal(t, int32(9), dt.Precision)
assert.Equal(t, int32(4), dt.Scale)
}
+
+// TestToDecimalLiteralRealisticValues exercises values whose MarshalBinary
+// output is shorter than 16 bytes. expr.NewLiteral rejects non-16-byte input
+// and DecimalLiteral.MarshalBinary uses the minimum byte representation, so
+// before the BE-min → LE-16 conversion these cases returned nil and panicked
+// in the caller. ValueString uses substrait-go's own LE decoder; matching it
+// against the expected string confirms the bytes are spec-conformant
+// little-endian two's complement.
+func TestToDecimalLiteralRealisticValues(t *testing.T) {
+ cases := []struct {
+ name string
+ val decimal128.Num
+ scale int
+ expected string
+ }{
+ {"positive small 1.0000", decimal128.New(0, 10000), 4,
"1.0000"},
+ {"positive 12345.67", decimal128.New(0, 1234567), 2,
"12345.67"},
+ {"zero", decimal128.New(0, 0), 4, "0.0000"},
+ {"negative -1.0000", decimal128.FromI64(-10000), 4, "-1.0000"},
+ {"negative -12345.67", decimal128.FromI64(-1234567), 2,
"-12345.67"},
+ // 16-byte negative: MarshalBinary returns exactly 16 bytes, so
the
+ // 0xff sign-extend fill must be fully overwritten by the copy.
+ {"negative -2^120", decimal128.New(-(1 << 56), 0), 0,
"-1329227995784915872903807060280344576"},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ fieldType := iceberg.DecimalTypeOf(18, tc.scale)
+ lit := iceberg.DecimalLiteral{Val: tc.val, Scale:
tc.scale}
+
+ got := toDecimalLiteral(fieldType, lit)
+ require.NotNil(t, got, "toDecimalLiteral must not
return nil for sub-16-byte values")
+
+ assert.Equal(t, tc.expected, got.ValueString())
+ })
+ }
+}