This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 1f8b81ff fix(substrait): use bound field precision in toDecimalLiteral
(#1028) (#1121)
1f8b81ff is described below
commit 1f8b81ffd005ba260604847a68c688bc5e93c811
Author: Masa <[email protected]>
AuthorDate: Tue May 26 18:03:34 2026 +1000
fix(substrait): use bound field precision in toDecimalLiteral (#1028)
(#1121)
DecimalLiteral.Type() hardcodes precision 9 because DecimalLiteral does
not carry the originating column's declared precision. The only
confirmed consumer that mis-uses this is toDecimalLiteral in
table/substrait/substrait.go, which embedded the wrong precision in
emitted Substrait literals for any column with precision != 9.
Per the Option B consensus on #1028:
1. toDecimalLiteral now reads precision from the bound field's
iceberg.DecimalType rather than v.Type().
2. Replaces the latent panic v.Type().(*iceberg.DecimalType) with the
correct value-type assertion v.Type().(iceberg.DecimalType);
DecimalTypeOf returns by value.
3. Documents the precision caveat on DecimalLiteral.Type() so future
callers consult the bound field type for real precision.
4. Adds whitebox regression tests for the bound-field path and the
defensive fallback path.
Fixes #1028
Signed-off-by: mzzz-zzm <[email protected]>
---
literals.go | 6 ++
table/substrait/substrait.go | 17 +++-
table/substrait/substrait_decimal_internal_test.go | 93 ++++++++++++++++++++++
3 files changed, 113 insertions(+), 3 deletions(-)
diff --git a/literals.go b/literals.go
index a658e39d..3d920a54 100644
--- a/literals.go
+++ b/literals.go
@@ -1221,6 +1221,12 @@ func (DecimalLiteral) Comparator() Comparator[Decimal] {
return v1.Val.Cmp(rescaled)
}
}
+
+// Type returns a DecimalType built from the literal's scale and a hardcoded
+// precision of 9. The precision is NOT the originating column's declared
+// precision; DecimalLiteral does not carry precision. Callers that need the
+// real column precision must consult the bound field's type rather than
+// lit.Type(). See https://github.com/apache/iceberg-go/issues/1028.
func (d DecimalLiteral) Type() Type { return DecimalTypeOf(9, d.Scale) }
func (d DecimalLiteral) Value() Decimal { return Decimal(d) }
func (d DecimalLiteral) Any() any { return d.Value() }
diff --git a/table/substrait/substrait.go b/table/substrait/substrait.go
index 2f486d35..6876893e 100644
--- a/table/substrait/substrait.go
+++ b/table/substrait/substrait.go
@@ -258,12 +258,23 @@ func toByteSliceSubstraitLiteral[T []byte | types.UUID](v
T) expr.Literal {
return expr.NewByteSliceLiteral(v, false)
}
-func toDecimalLiteral(v iceberg.DecimalLiteral) expr.Literal {
+// toDecimalLiteral converts an iceberg.DecimalLiteral into a Substrait decimal
+// literal. Precision is taken from the bound field's iceberg.DecimalType
(typ),
+// NOT from v.Type(). DecimalLiteral does not carry the originating column's
+// precision, so v.Type() always reports a hardcoded precision of 9 (see
+// https://github.com/apache/iceberg-go/issues/1028). If typ is not a
+// DecimalType, this falls back to v.Type()'s (hardcoded) precision.
+func toDecimalLiteral(typ iceberg.Type, v iceberg.DecimalLiteral) expr.Literal
{
+ precision := v.Type().(iceberg.DecimalType).Precision()
+ if dt, ok := typ.(iceberg.DecimalType); ok {
+ precision = dt.Precision()
+ }
+
byts, _ := v.MarshalBinary()
result, _ := expr.NewLiteral(&types.Decimal{
Scale: int32(v.Scale),
Value: byts,
- Precision: int32(v.Type().(*iceberg.DecimalType).Precision()),
+ Precision: int32(precision),
}, false)
return result
@@ -304,7 +315,7 @@ func toSubstraitLiteral(typ iceberg.Type, lit
iceberg.Literal) expr.Literal {
case iceberg.UUIDLiteral:
return toByteSliceSubstraitLiteral(types.UUID(lit[:]))
case iceberg.DecimalLiteral:
- return toDecimalLiteral(lit)
+ return toDecimalLiteral(typ, lit)
}
panic(fmt.Errorf("invalid literal type: %s", lit.Type()))
}
diff --git a/table/substrait/substrait_decimal_internal_test.go
b/table/substrait/substrait_decimal_internal_test.go
new file mode 100644
index 00000000..60e9f99e
--- /dev/null
+++ b/table/substrait/substrait_decimal_internal_test.go
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package substrait
+
+import (
+ "testing"
+
+ "github.com/apache/arrow-go/v18/arrow/decimal128"
+ "github.com/apache/iceberg-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "github.com/substrait-io/substrait-go/v8/types"
+)
+
+// decimalLiteral16Bytes builds a DecimalLiteral whose MarshalBinary output is
+// exactly 16 bytes. expr.NewLiteral (substrait-go) rejects decimal values that
+// are not 16 bytes long; DecimalLiteral.MarshalBinary uses the minimum byte
+// representation, so we choose an unscaled value with bit length in [121,128]
+// to force a 16-byte big-endian encoding.
+func decimalLiteral16Bytes(scale int) iceberg.DecimalLiteral {
+ // 2^120: BigInt.BitLen() == 121, requires 16 bytes.
+ val := decimal128.New(1<<56, 0)
+
+ return iceberg.DecimalLiteral{Val: val, Scale: scale}
+}
+
+// TestToDecimalLiteralUsesBoundFieldPrecision pins the invariant that the
+// emitted Substrait decimal precision comes from the bound field's
+// iceberg.DecimalType, not from the literal. DecimalLiteral does not carry
+// the originating column's declared precision, so v.Type() reports a
+// hardcoded precision of 9; sourcing precision from v.Type() turns every
+// subcase below red. See https://github.com/apache/iceberg-go/issues/1028.
+func TestToDecimalLiteralUsesBoundFieldPrecision(t *testing.T) {
+ cases := []struct {
+ name string
+ precision int
+ scale int
+ }{
+ {"decimal(38,4)", 38, 4},
+ {"decimal(38,10)", 38, 10},
+ {"decimal(38,2)", 38, 2},
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ fieldType := iceberg.DecimalTypeOf(tc.precision,
tc.scale)
+ lit := decimalLiteral16Bytes(tc.scale)
+
+ got := toDecimalLiteral(fieldType, lit)
+ require.NotNil(t, got, "toDecimalLiteral must not
silently return nil")
+
+ dt, ok := got.GetType().(*types.DecimalType)
+ require.Truef(t, ok, "expected *types.DecimalType, got
%T", got.GetType())
+
+ assert.Equalf(t, int32(tc.precision), dt.Precision,
+ "emitted precision must equal bound field
precision; got decimal<%d,%d>",
+ dt.Precision, dt.Scale)
+ assert.Equalf(t, int32(tc.scale), dt.Scale,
+ "emitted scale must equal literal scale; got
decimal<%d,%d>",
+ dt.Precision, dt.Scale)
+ })
+ }
+}
+
+// TestToDecimalLiteralFallbackWhenTypIsNotDecimal pins the defensive
+// fallback: when typ is not a DecimalType, the function uses the literal's
+// own (hardcoded-9) precision and must not panic.
+func TestToDecimalLiteralFallbackWhenTypIsNotDecimal(t *testing.T) {
+ lit := decimalLiteral16Bytes(4)
+
+ got := toDecimalLiteral(iceberg.PrimitiveTypes.String, lit)
+ require.NotNil(t, got)
+
+ dt, ok := got.GetType().(*types.DecimalType)
+ require.Truef(t, ok, "expected *types.DecimalType, got %T",
got.GetType())
+ assert.Equal(t, int32(9), dt.Precision)
+ assert.Equal(t, int32(4), dt.Scale)
+}