This is an automated email from the ASF dual-hosted git repository.

laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 1f8b81ff fix(substrait): use bound field precision in toDecimalLiteral 
(#1028) (#1121)
1f8b81ff is described below

commit 1f8b81ffd005ba260604847a68c688bc5e93c811
Author: Masa <[email protected]>
AuthorDate: Tue May 26 18:03:34 2026 +1000

    fix(substrait): use bound field precision in toDecimalLiteral (#1028) 
(#1121)
    
    DecimalLiteral.Type() hardcodes precision 9 because DecimalLiteral does
    not carry the originating column's declared precision. The only
    confirmed consumer that mis-uses this is toDecimalLiteral in
    table/substrait/substrait.go, which embedded the wrong precision in
    emitted Substrait literals for any column with precision != 9.
    
    Per the Option B consensus on #1028:
    
    1. toDecimalLiteral now reads precision from the bound field's
    iceberg.DecimalType rather than v.Type().
    2. Replaces the latent panic v.Type().(*iceberg.DecimalType) with the
    correct value-type assertion v.Type().(iceberg.DecimalType);
    DecimalTypeOf returns by value.
    3. Documents the precision caveat on DecimalLiteral.Type() so future
    callers consult the bound field type for real precision.
    4. Adds whitebox regression tests for the bound-field path and the
    defensive fallback path.
    
    Fixes #1028
    
    Signed-off-by: mzzz-zzm <[email protected]>
---
 literals.go                                        |  6 ++
 table/substrait/substrait.go                       | 17 +++-
 table/substrait/substrait_decimal_internal_test.go | 93 ++++++++++++++++++++++
 3 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/literals.go b/literals.go
index a658e39d..3d920a54 100644
--- a/literals.go
+++ b/literals.go
@@ -1221,6 +1221,12 @@ func (DecimalLiteral) Comparator() Comparator[Decimal] {
                return v1.Val.Cmp(rescaled)
        }
 }
+
+// Type returns a DecimalType built from the literal's scale and a hardcoded
+// precision of 9. The precision is NOT the originating column's declared
+// precision; DecimalLiteral does not carry precision. Callers that need the
+// real column precision must consult the bound field's type rather than
+// lit.Type(). See https://github.com/apache/iceberg-go/issues/1028.
 func (d DecimalLiteral) Type() Type     { return DecimalTypeOf(9, d.Scale) }
 func (d DecimalLiteral) Value() Decimal { return Decimal(d) }
 func (d DecimalLiteral) Any() any       { return d.Value() }
diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go
index 2f486d35..6876893e 100644
--- a/table/substrait/substrait.go
+++ b/table/substrait/substrait.go
@@ -258,12 +258,23 @@ func toByteSliceSubstraitLiteral[T []byte | types.UUID](v 
T) expr.Literal {
        return expr.NewByteSliceLiteral(v, false)
 }
 
-func toDecimalLiteral(v iceberg.DecimalLiteral) expr.Literal {
+// toDecimalLiteral converts an iceberg.DecimalLiteral into a Substrait decimal
+// literal. Precision is taken from the bound field's iceberg.DecimalType 
(typ),
+// NOT from v.Type(). DecimalLiteral does not carry the originating column's
+// precision, so v.Type() always reports a hardcoded precision of 9 (see
+// https://github.com/apache/iceberg-go/issues/1028). If typ is not a
+// DecimalType, this falls back to v.Type()'s (hardcoded) precision.
+func toDecimalLiteral(typ iceberg.Type, v iceberg.DecimalLiteral) expr.Literal 
{
+       precision := v.Type().(iceberg.DecimalType).Precision()
+       if dt, ok := typ.(iceberg.DecimalType); ok {
+               precision = dt.Precision()
+       }
+
        byts, _ := v.MarshalBinary()
        result, _ := expr.NewLiteral(&types.Decimal{
                Scale:     int32(v.Scale),
                Value:     byts,
-               Precision: int32(v.Type().(*iceberg.DecimalType).Precision()),
+               Precision: int32(precision),
        }, false)
 
        return result
@@ -304,7 +315,7 @@ func toSubstraitLiteral(typ iceberg.Type, lit 
iceberg.Literal) expr.Literal {
        case iceberg.UUIDLiteral:
                return toByteSliceSubstraitLiteral(types.UUID(lit[:]))
        case iceberg.DecimalLiteral:
-               return toDecimalLiteral(lit)
+               return toDecimalLiteral(typ, lit)
        }
        panic(fmt.Errorf("invalid literal type: %s", lit.Type()))
 }
diff --git a/table/substrait/substrait_decimal_internal_test.go 
b/table/substrait/substrait_decimal_internal_test.go
new file mode 100644
index 00000000..60e9f99e
--- /dev/null
+++ b/table/substrait/substrait_decimal_internal_test.go
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package substrait
+
+import (
+       "testing"
+
+       "github.com/apache/arrow-go/v18/arrow/decimal128"
+       "github.com/apache/iceberg-go"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/require"
+       "github.com/substrait-io/substrait-go/v8/types"
+)
+
+// decimalLiteral16Bytes builds a DecimalLiteral whose MarshalBinary output is
+// exactly 16 bytes. expr.NewLiteral (substrait-go) rejects decimal values that
+// are not 16 bytes long; DecimalLiteral.MarshalBinary uses the minimum byte
+// representation, so we choose an unscaled value with bit length in [121,128]
+// to force a 16-byte big-endian encoding.
+func decimalLiteral16Bytes(scale int) iceberg.DecimalLiteral {
+       // 2^120: BigInt.BitLen() == 121, requires 16 bytes.
+       val := decimal128.New(1<<56, 0)
+
+       return iceberg.DecimalLiteral{Val: val, Scale: scale}
+}
+
+// TestToDecimalLiteralUsesBoundFieldPrecision pins the invariant that the
+// emitted Substrait decimal precision comes from the bound field's
+// iceberg.DecimalType, not from the literal. DecimalLiteral does not carry
+// the originating column's declared precision, so v.Type() reports a
+// hardcoded precision of 9; sourcing precision from v.Type() turns every
+// subcase below red. See https://github.com/apache/iceberg-go/issues/1028.
+func TestToDecimalLiteralUsesBoundFieldPrecision(t *testing.T) {
+       cases := []struct {
+               name      string
+               precision int
+               scale     int
+       }{
+               {"decimal(38,4)", 38, 4},
+               {"decimal(38,10)", 38, 10},
+               {"decimal(38,2)", 38, 2},
+       }
+
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       fieldType := iceberg.DecimalTypeOf(tc.precision, 
tc.scale)
+                       lit := decimalLiteral16Bytes(tc.scale)
+
+                       got := toDecimalLiteral(fieldType, lit)
+                       require.NotNil(t, got, "toDecimalLiteral must not 
silently return nil")
+
+                       dt, ok := got.GetType().(*types.DecimalType)
+                       require.Truef(t, ok, "expected *types.DecimalType, got 
%T", got.GetType())
+
+                       assert.Equalf(t, int32(tc.precision), dt.Precision,
+                               "emitted precision must equal bound field 
precision; got decimal<%d,%d>",
+                               dt.Precision, dt.Scale)
+                       assert.Equalf(t, int32(tc.scale), dt.Scale,
+                               "emitted scale must equal literal scale; got 
decimal<%d,%d>",
+                               dt.Precision, dt.Scale)
+               })
+       }
+}
+
+// TestToDecimalLiteralFallbackWhenTypIsNotDecimal pins the defensive
+// fallback: when typ is not a DecimalType, the function uses the literal's
+// own (hardcoded-9) precision and must not panic.
+func TestToDecimalLiteralFallbackWhenTypIsNotDecimal(t *testing.T) {
+       lit := decimalLiteral16Bytes(4)
+
+       got := toDecimalLiteral(iceberg.PrimitiveTypes.String, lit)
+       require.NotNil(t, got)
+
+       dt, ok := got.GetType().(*types.DecimalType)
+       require.Truef(t, ok, "expected *types.DecimalType, got %T", 
got.GetType())
+       assert.Equal(t, int32(9), dt.Precision)
+       assert.Equal(t, int32(4), dt.Scale)
+}

Reply via email to