This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new db5392bf fix(substrait): emit decimal literals as 16-byte 
little-endian (#1130)
db5392bf is described below

commit db5392bf4f9c8fff37215fac32852483f1cd4ac7
Author: Andrei Tserakhau <[email protected]>
AuthorDate: Wed May 27 18:16:58 2026 +0200

    fix(substrait): emit decimal literals as 16-byte little-endian (#1130)
    
    DecimalLiteral.MarshalBinary returns big-endian min-byte; Substrait's
    expr.NewLiteral needs 16-byte little-endian and was silently returning
    nil for everything else, panicking downstream. Convert before emit and
    add whitebox tests covering small/zero/16-byte-negative values.
---
 table/substrait/endian_be.go                       | 24 ++++++++++++++
 table/substrait/endian_fallback.go                 | 31 ++++++++++++++++++
 table/substrait/endian_le.go                       | 24 ++++++++++++++
 table/substrait/substrait.go                       | 18 +++++++++--
 table/substrait/substrait_decimal_internal_test.go | 37 ++++++++++++++++++++++
 5 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/table/substrait/endian_be.go b/table/substrait/endian_be.go
new file mode 100644
index 00000000..220e7f8f
--- /dev/null
+++ b/table/substrait/endian_be.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || 
ppc64 || s390 || s390x || shbe || sparc || sparc64
+
+package substrait
+
+// hostIsLE is a compile-time constant on known big-endian GOARCHes so the
+// compiler can dead-code-eliminate the little-endian branch in 
toDecimalLiteral.
+const hostIsLE = false
diff --git a/table/substrait/endian_fallback.go 
b/table/substrait/endian_fallback.go
new file mode 100644
index 00000000..d4bdff6c
--- /dev/null
+++ b/table/substrait/endian_fallback.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build !(386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || 
mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh 
|| wasm || armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || 
ppc64 || s390 || s390x || shbe || sparc || sparc64)
+
+package substrait
+
+import "unsafe"
+
+// hostIsLE is a runtime probe for GOARCHes not enumerated in endian_le.go or
+// endian_be.go. The compiler cannot dead-code-eliminate the unused branch in
+// toDecimalLiteral on these architectures, but the build still works.
+var hostIsLE = func() bool {
+       var probe uint16 = 1
+
+       return *(*byte)(unsafe.Pointer(&probe)) == 1
+}()
diff --git a/table/substrait/endian_le.go b/table/substrait/endian_le.go
new file mode 100644
index 00000000..e43ced0e
--- /dev/null
+++ b/table/substrait/endian_le.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || 
mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh 
|| wasm
+
+package substrait
+
+// hostIsLE is a compile-time constant on known little-endian GOARCHes so the
+// compiler can dead-code-eliminate the big-endian branch in toDecimalLiteral.
+const hostIsLE = true
diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go
index 6876893e..25bec917 100644
--- a/table/substrait/substrait.go
+++ b/table/substrait/substrait.go
@@ -19,8 +19,10 @@ package substrait
 
 import (
        _ "embed"
+       "encoding/binary"
        "fmt"
        "strings"
+       "unsafe"
 
        "github.com/apache/arrow-go/v18/arrow/compute/exprs"
        "github.com/apache/iceberg-go"
@@ -270,10 +272,22 @@ func toDecimalLiteral(typ iceberg.Type, v 
iceberg.DecimalLiteral) expr.Literal {
                precision = dt.Precision()
        }
 
-       byts, _ := v.MarshalBinary()
+       // decimal128.Num is {lo uint64, hi int64} — its raw in-memory bytes on
+       // LE hosts are already the 16-byte little-endian two's-complement
+       // representation Substrait wants. hostIsLE is a build-tag const, so the
+       // compiler drops the unused branch on every known GOARCH.
+       n := v.Value().Val
+       buf := make([]byte, 16)
+       if hostIsLE {
+               copy(buf, unsafe.Slice((*byte)(unsafe.Pointer(&n)), 16))
+       } else {
+               binary.LittleEndian.PutUint64(buf[:8], n.LowBits())
+               binary.LittleEndian.PutUint64(buf[8:], uint64(n.HighBits()))
+       }
+
        result, _ := expr.NewLiteral(&types.Decimal{
                Scale:     int32(v.Scale),
-               Value:     byts,
+               Value:     buf,
                Precision: int32(precision),
        }, false)
 
diff --git a/table/substrait/substrait_decimal_internal_test.go 
b/table/substrait/substrait_decimal_internal_test.go
index 60e9f99e..f0d6edf6 100644
--- a/table/substrait/substrait_decimal_internal_test.go
+++ b/table/substrait/substrait_decimal_internal_test.go
@@ -91,3 +91,40 @@ func TestToDecimalLiteralFallbackWhenTypIsNotDecimal(t 
*testing.T) {
        assert.Equal(t, int32(9), dt.Precision)
        assert.Equal(t, int32(4), dt.Scale)
 }
+
+// TestToDecimalLiteralRealisticValues exercises values whose MarshalBinary
+// output is shorter than 16 bytes. expr.NewLiteral rejects non-16-byte input
+// and DecimalLiteral.MarshalBinary uses the minimum byte representation, so
+// before the BE-min → LE-16 conversion these cases returned nil and panicked
+// in the caller. ValueString uses substrait-go's own LE decoder; matching it
+// against the expected string confirms the bytes are spec-conformant
+// little-endian two's complement.
+func TestToDecimalLiteralRealisticValues(t *testing.T) {
+       cases := []struct {
+               name     string
+               val      decimal128.Num
+               scale    int
+               expected string
+       }{
+               {"positive small 1.0000", decimal128.New(0, 10000), 4, 
"1.0000"},
+               {"positive 12345.67", decimal128.New(0, 1234567), 2, 
"12345.67"},
+               {"zero", decimal128.New(0, 0), 4, "0.0000"},
+               {"negative -1.0000", decimal128.FromI64(-10000), 4, "-1.0000"},
+               {"negative -12345.67", decimal128.FromI64(-1234567), 2, 
"-12345.67"},
+               // 16-byte negative: MarshalBinary returns exactly 16 bytes, so 
the
+               // 0xff sign-extend fill must be fully overwritten by the copy.
+               {"negative -2^120", decimal128.New(-(1 << 56), 0), 0, 
"-1329227995784915872903807060280344576"},
+       }
+
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       fieldType := iceberg.DecimalTypeOf(18, tc.scale)
+                       lit := iceberg.DecimalLiteral{Val: tc.val, Scale: 
tc.scale}
+
+                       got := toDecimalLiteral(fieldType, lit)
+                       require.NotNil(t, got, "toDecimalLiteral must not 
return nil for sub-16-byte values")
+
+                       assert.Equal(t, tc.expected, got.ValueString())
+               })
+       }
+}

Reply via email to