Copilot commented on code in PR #265:
URL: https://github.com/apache/fluss-rust/pull/265#discussion_r2776723783
##########
bindings/cpp/src/types.rs:
##########
@@ -68,6 +78,7 @@ fn ffi_data_type_to_core(dt: i32) ->
Result<fcore::metadata::DataType> {
DATA_TYPE_TIME => Ok(fcore::metadata::DataTypes::time()),
DATA_TYPE_TIMESTAMP => Ok(fcore::metadata::DataTypes::timestamp()),
DATA_TYPE_TIMESTAMP_LTZ =>
Ok(fcore::metadata::DataTypes::timestamp_ltz()),
+ DATA_TYPE_DECIMAL => Ok(fcore::metadata::DataTypes::decimal(precision,
scale)),
Review Comment:
`DataTypes::decimal(precision, scale)` panics on invalid parameters (it
calls `DecimalType::new(...).expect(...)`). Since precision/scale come from FFI
input, this should not be able to panic. Consider validating precision/scale
before calling this, or constructing via `DecimalType::new(...)?` and mapping
the error into the `Result` returned by `ffi_data_type_to_core`.
```suggestion
DATA_TYPE_DECIMAL => {
if precision == 0 {
return Err(anyhow!(
"Invalid decimal type from FFI: precision must be > 0,
got {precision}"
));
}
if scale > precision {
return Err(anyhow!(
"Invalid decimal type from FFI: scale ({scale}) must be
<= precision ({precision})"
));
}
Ok(fcore::metadata::DataTypes::decimal(precision, scale))
}
```
##########
bindings/cpp/src/types.rs:
##########
@@ -97,7 +109,7 @@ pub fn ffi_descriptor_to_core(
let mut schema_builder = fcore::metadata::Schema::builder();
for col in &descriptor.schema.columns {
- let dt = ffi_data_type_to_core(col.data_type)?;
+ let dt = ffi_data_type_to_core(col.data_type, col.precision as u32,
col.scale as u32)?;
Review Comment:
`col.precision`/`col.scale` are `i32` and are cast to `u32` here. If a
caller passes negative values, this will wrap to a huge `u32` and then
`DataTypes::decimal(...)` will panic internally (it uses `expect("Invalid
decimal parameters")`), which would abort across the CXX FFI boundary. Please
validate precision/scale are non-negative and within the supported bounds (p in
1..=38, s in 0..=p) and return a normal `Err` instead of panicking.
```suggestion
let dt = if col.data_type == DATA_TYPE_DECIMAL {
let precision = col.precision;
let scale = col.scale;
// Validate decimal precision/scale to avoid panics in decimal
constructor
if precision < 1 || precision > 38 {
return Err(anyhow!(
"Invalid decimal precision {} for column '{}'; expected
1..=38",
precision,
col.name
));
}
if scale < 0 || scale > precision {
return Err(anyhow!(
"Invalid decimal scale {} for column '{}' with precision
{}; expected 0..=precision",
scale,
col.name,
precision
));
}
ffi_data_type_to_core(col.data_type, precision as u32, scale as
u32)?
} else {
ffi_data_type_to_core(col.data_type, col.precision as u32,
col.scale as u32)?
};
```
##########
bindings/cpp/src/types.rs:
##########
@@ -224,12 +263,23 @@ pub fn ffi_row_to_core(row: &ffi::FfiGenericRow) ->
fcore::row::GenericRow<'_> {
DATUM_TYPE_FLOAT64 => Datum::Float64(field.f64_val.into()),
DATUM_TYPE_STRING =>
Datum::String(Cow::Borrowed(field.string_val.as_str())),
DATUM_TYPE_BYTES =>
Datum::Blob(Cow::Borrowed(field.bytes_val.as_slice())),
- _ => Datum::Null,
+ DATUM_TYPE_DECIMAL_STRING => {
+ let (precision, scale) = get_decimal_type(idx, schema)?;
+ let bd =
bigdecimal::BigDecimal::from_str(field.string_val.as_str())
+ .map_err(|e| anyhow!(
+ "Column {idx}: invalid decimal string '{}': {e}",
+ field.string_val
+ ))?;
+ let decimal = fcore::row::Decimal::from_big_decimal(bd,
precision, scale)
+ .map_err(|e| anyhow!("Column {idx}: {e}"))?;
+ Datum::Decimal(decimal)
+ }
Review Comment:
`ffi_row_to_core` only accepts `DATUM_TYPE_DECIMAL_STRING`, but the read
path introduced `DATUM_TYPE_DECIMAL_I64`/`DATUM_TYPE_DECIMAL_I128` and C++
exposes those `DatumType` values. This makes it easy for users to take a
scanned row (which will contain DecimalI64/I128) and append it back, but it
will currently fail with "unknown datum type". Please handle
`DATUM_TYPE_DECIMAL_I64`/`DATUM_TYPE_DECIMAL_I128` here (you already have
`decimal_precision`/`decimal_scale` and the unscaled value in the datum) and
convert them into `Datum::Decimal`.
```suggestion
}
DATUM_TYPE_DECIMAL_I64 => {
let (precision, scale) = get_decimal_type(idx, schema)?;
let unscaled = field.i64_val;
let s = if scale == 0 {
unscaled.to_string()
} else {
let negative = unscaled < 0;
let mut digits = unscaled
.abs()
.to_string();
let scale_usize = scale as usize;
if digits.len() <= scale_usize {
let mut padded = String::with_capacity(scale_usize +
1);
for _ in 0..(scale_usize - digits.len() + 1) {
padded.push('0');
}
padded.push_str(&digits);
digits = padded;
}
let split_pos = digits.len() - scale_usize;
let (int_part, frac_part) = digits.split_at(split_pos);
let mut result = String::with_capacity(digits.len() + 2);
if negative {
result.push('-');
}
result.push_str(int_part);
result.push('.');
result.push_str(frac_part);
result
};
let bd = bigdecimal::BigDecimal::from_str(&s).map_err(|e| {
anyhow!("Column {idx}: invalid decimal I64 value '{}':
{e}", s)
})?;
let decimal = fcore::row::Decimal::from_big_decimal(bd,
precision, scale)
.map_err(|e| anyhow!("Column {idx}: {e}"))?;
Datum::Decimal(decimal)
}
DATUM_TYPE_DECIMAL_I128 => {
let (precision, scale) = get_decimal_type(idx, schema)?;
let unscaled = field.i128_val;
let s = if scale == 0 {
unscaled.to_string()
} else {
let negative = unscaled < 0;
let mut digits = unscaled
.abs()
.to_string();
let scale_usize = scale as usize;
if digits.len() <= scale_usize {
let mut padded = String::with_capacity(scale_usize +
1);
for _ in 0..(scale_usize - digits.len() + 1) {
padded.push('0');
}
padded.push_str(&digits);
digits = padded;
}
let split_pos = digits.len() - scale_usize;
let (int_part, frac_part) = digits.split_at(split_pos);
let mut result = String::with_capacity(digits.len() + 2);
if negative {
result.push('-');
}
result.push_str(int_part);
result.push('.');
result.push_str(frac_part);
result
};
let bd = bigdecimal::BigDecimal::from_str(&s).map_err(|e| {
anyhow!("Column {idx}: invalid decimal I128 value '{}':
{e}", s)
})?;
let decimal = fcore::row::Decimal::from_big_decimal(bd,
precision, scale)
.map_err(|e| anyhow!("Column {idx}: {e}"))?;
Datum::Decimal(decimal)
}
```
##########
bindings/cpp/include/fluss.hpp:
##########
@@ -269,6 +286,70 @@ struct Datum {
d.bytes_val = std::move(v);
return d;
}
+ // Stores the decimal string as-is. Rust side will parse via BigDecimal,
+ // look up (p,s) from the schema, validate, and create the Decimal.
+ static Datum DecimalString(std::string str) {
+ Datum d;
+ d.type = DatumType::DecimalString;
+ d.string_val = std::move(str);
+ return d;
+ }
+
+ bool IsDecimal() const {
+ return type == DatumType::DecimalI64 || type == DatumType::DecimalI128
+ || type == DatumType::DecimalString;
+ }
+
+ std::string DecimalToString() const {
+ if (type == DatumType::DecimalI64) {
+ return FormatUnscaled64(i64_val, decimal_scale);
+ } else if (type == DatumType::DecimalI128) {
+ __int128 val = (static_cast<__int128>(i128_hi) << 64) |
+
static_cast<__int128>(static_cast<uint64_t>(i128_lo));
Review Comment:
In `DecimalToString()`, reconstructing the `__int128` uses
`(static_cast<__int128>(i128_hi) << 64)`, but left-shifting a negative signed
value is undefined behavior in C++. For negative decimals, `i128_hi` will be
negative, so this can miscompile. Reconstruct using an unsigned intermediate
(bitwise composition in `unsigned __int128` from the `uint64_t` bit patterns)
and then cast to `__int128`.
```suggestion
unsigned __int128 uval = (static_cast<unsigned
__int128>(static_cast<uint64_t>(i128_hi)) << 64) |
static_cast<unsigned
__int128>(static_cast<uint64_t>(i128_lo));
__int128 val = static_cast<__int128>(uval);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]