This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 05a2ae6e build: upgrade rust dev toolchain to 1.94 (#595)
05a2ae6e is described below
commit 05a2ae6e77ded948574960bc8ad4d3403046395c
Author: Shiyan Xu <[email protected]>
AuthorDate: Mon May 4 18:25:22 2026 -0500
build: upgrade rust dev toolchain to 1.94 (#595)
---
.github/workflows/release.yml | 4 --
benchmark/tpch/src/main.rs | 10 ++--
crates/core/src/config/read_options.rs | 10 ++--
crates/core/src/config/table.rs | 9 +---
crates/core/src/file_group/builder.rs | 14 ++---
crates/core/src/file_group/log_file/scanner.rs | 8 +--
crates/core/src/hfile/reader.rs | 74 +++++++++++++-------------
crates/core/src/metadata/merger.rs | 30 +++++------
crates/core/src/metadata/table/records.rs | 10 ++--
crates/core/src/schema/resolver.rs | 8 +--
crates/core/src/table/builder.rs | 9 ++--
crates/core/src/table/partition.rs | 2 +-
crates/core/src/timeline/selector.rs | 24 ++++-----
crates/datafusion/src/util/expr.rs | 2 +-
demo/infra/runner/Dockerfile | 2 +-
rust-toolchain.toml | 2 +-
16 files changed, 103 insertions(+), 115 deletions(-)
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index cb423138..a45de8f5 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -51,10 +51,6 @@ jobs:
steps:
- uses: actions/checkout@v6
- - run: |
- rustup toolchain add --profile=minimal stable
- rustup override set stable
-
- name: cargo publish
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
diff --git a/benchmark/tpch/src/main.rs b/benchmark/tpch/src/main.rs
index fed3cc97..73362a41 100644
--- a/benchmark/tpch/src/main.rs
+++ b/benchmark/tpch/src/main.rs
@@ -668,7 +668,7 @@ fn compute_stats(timings: &[f64]) -> Option<TimingStats> {
let min = sorted[0];
let max = sorted[sorted.len() - 1];
let mean = sorted.iter().sum::<f64>() / sorted.len() as f64;
- let median = if sorted.len() % 2 == 0 {
+ let median = if sorted.len().is_multiple_of(2) {
let mid = sorted.len() / 2;
(sorted[mid - 1] + sorted[mid]) / 2.0
} else {
@@ -915,10 +915,10 @@ fn parse_spark_timings(reader: Box<dyn BufRead>) ->
Vec<QueryResult> {
let mut all_timings: BTreeMap<usize, Vec<f64>> = BTreeMap::new();
for line in reader.lines().map_while(|l| l.ok()) {
- if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) {
- if let (Some(q), Some(ms)) = (v["query"].as_u64(),
v["elapsed_ms"].as_f64()) {
- all_timings.entry(q as usize).or_default().push(ms);
- }
+ if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line)
+ && let (Some(q), Some(ms)) = (v["query"].as_u64(),
v["elapsed_ms"].as_f64())
+ {
+ all_timings.entry(q as usize).or_default().push(ms);
}
}
diff --git a/crates/core/src/config/read_options.rs
b/crates/core/src/config/read_options.rs
index 6b5acf30..2cec1214 100644
--- a/crates/core/src/config/read_options.rs
+++ b/crates/core/src/config/read_options.rs
@@ -286,11 +286,11 @@ impl ReadOptions {
let mut resolved = self.clone();
for key in HudiReadConfig::iter() {
let key_str = key.key_str();
- if !resolved.hudi_options.contains_key(key_str) {
- if let Some(val) = configs.try_get(key)? {
- let s: String = val.into();
- resolved.hudi_options.insert(key_str.to_string(), s);
- }
+ if !resolved.hudi_options.contains_key(key_str)
+ && let Some(val) = configs.try_get(key)?
+ {
+ let s: String = val.into();
+ resolved.hudi_options.insert(key_str.to_string(), s);
}
}
Ok(resolved)
diff --git a/crates/core/src/config/table.rs b/crates/core/src/config/table.rs
index c385e97d..bbe32ab7 100644
--- a/crates/core/src/config/table.rs
+++ b/crates/core/src/config/table.rs
@@ -374,20 +374,15 @@ impl FromStr for BaseFileFormatValue {
}
/// Config value for [HudiTableConfig::TimelineTimezone].
-#[derive(Clone, Debug, PartialEq, AsRefStr)]
+#[derive(Clone, Debug, PartialEq, AsRefStr, Default)]
pub enum TimelineTimezoneValue {
#[strum(serialize = "utc")]
+ #[default]
UTC,
#[strum(serialize = "local")]
Local,
}
-impl Default for TimelineTimezoneValue {
- fn default() -> Self {
- Self::UTC
- }
-}
-
impl FromStr for TimelineTimezoneValue {
type Err = ConfigError;
diff --git a/crates/core/src/file_group/builder.rs
b/crates/core/src/file_group/builder.rs
index 71c79c7c..6fa4693f 100644
--- a/crates/core/src/file_group/builder.rs
+++ b/crates/core/src/file_group/builder.rs
@@ -145,13 +145,13 @@ pub(crate) fn
file_groups_from_commit_metadata_with_estimator<V: CompletionTimeV
file_group.add_base_file(base_file)?;
// Log files are only present in MOR write stats (the `baseFile`
branch).
- if write_stat.base_file.is_some() {
- if let Some(log_file_names) = &write_stat.log_files {
- for log_file_name in log_file_names {
- let mut log_file = LogFile::from_str(log_file_name)?;
- log_file.set_completion_time(completion_time_view);
- file_group.add_log_file(log_file)?;
- }
+ if write_stat.base_file.is_some()
+ && let Some(log_file_names) = &write_stat.log_files
+ {
+ for log_file_name in log_file_names {
+ let mut log_file = LogFile::from_str(log_file_name)?;
+ log_file.set_completion_time(completion_time_view);
+ file_group.add_log_file(log_file)?;
}
}
diff --git a/crates/core/src/file_group/log_file/scanner.rs
b/crates/core/src/file_group/log_file/scanner.rs
index b911ffd2..2be70190 100644
--- a/crates/core/src/file_group/log_file/scanner.rs
+++ b/crates/core/src/file_group/log_file/scanner.rs
@@ -252,10 +252,10 @@ impl LogFileScanner {
let mut total_records = 0;
for blocks in &collected.all_blocks {
for block in blocks {
- if block.block_type == BlockType::HfileData {
- if let Some(records) = block.content.as_hfile_records() {
- total_records += records.len();
- }
+ if block.block_type == BlockType::HfileData
+ && let Some(records) = block.content.as_hfile_records()
+ {
+ total_records += records.len();
}
}
}
diff --git a/crates/core/src/hfile/reader.rs b/crates/core/src/hfile/reader.rs
index e6d8879f..e027f801 100644
--- a/crates/core/src/hfile/reader.rs
+++ b/crates/core/src/hfile/reader.rs
@@ -190,32 +190,31 @@ impl HFileReader {
/// Check if the file uses MVCC timestamps (not supported).
fn check_mvcc_support(&self) -> Result<()> {
- if let Some(version_bytes) =
self.file_info.get(FILE_INFO_KEY_VALUE_VERSION) {
- if version_bytes.len() >= 4 {
- let version = i32::from_be_bytes([
- version_bytes[0],
- version_bytes[1],
- version_bytes[2],
- version_bytes[3],
+ if let Some(version_bytes) =
self.file_info.get(FILE_INFO_KEY_VALUE_VERSION)
+ && version_bytes.len() >= 4
+ {
+ let version = i32::from_be_bytes([
+ version_bytes[0],
+ version_bytes[1],
+ version_bytes[2],
+ version_bytes[3],
+ ]);
+ if version == KEY_VALUE_VERSION_WITH_MVCC_TS
+ && let Some(ts_bytes) =
self.file_info.get(FILE_INFO_MAX_MVCC_TS)
+ && ts_bytes.len() >= 8
+ {
+ let max_ts = i64::from_be_bytes([
+ ts_bytes[0],
+ ts_bytes[1],
+ ts_bytes[2],
+ ts_bytes[3],
+ ts_bytes[4],
+ ts_bytes[5],
+ ts_bytes[6],
+ ts_bytes[7],
]);
- if version == KEY_VALUE_VERSION_WITH_MVCC_TS {
- if let Some(ts_bytes) =
self.file_info.get(FILE_INFO_MAX_MVCC_TS) {
- if ts_bytes.len() >= 8 {
- let max_ts = i64::from_be_bytes([
- ts_bytes[0],
- ts_bytes[1],
- ts_bytes[2],
- ts_bytes[3],
- ts_bytes[4],
- ts_bytes[5],
- ts_bytes[6],
- ts_bytes[7],
- ]);
- if max_ts > 0 {
- return
Err(HFileError::UnsupportedMvccTimestamp);
- }
- }
- }
+ if max_ts > 0 {
+ return Err(HFileError::UnsupportedMvccTimestamp);
}
}
}
@@ -608,12 +607,11 @@ impl HFileReader {
std::cmp::Ordering::Greater => {
// Current key > lookup key: backward seek
// Check if we're at the first key of a block and lookup >=
fake first key
- if let Some(entry) = &self.current_block_entry {
- if self.is_at_first_key_of_block()
- && compare_keys(&entry.first_key, lookup_key) !=
std::cmp::Ordering::Greater
- {
- return Ok(SeekResult::BeforeBlockFirstKey);
- }
+ if let Some(entry) = &self.current_block_entry
+ && self.is_at_first_key_of_block()
+ && compare_keys(&entry.first_key, lookup_key) !=
std::cmp::Ordering::Greater
+ {
+ return Ok(SeekResult::BeforeBlockFirstKey);
}
// Check if before file's first key
@@ -643,13 +641,13 @@ impl HFileReader {
}
} else {
// Last block - check against last key
- if let Some(last_key) = &self.last_key {
- if compare_keys(last_key, lookup_key) ==
std::cmp::Ordering::Less {
- self.cursor.eof = true;
- self.current_block = None;
- self.current_block_entry = None;
- return Ok(SeekResult::Eof);
- }
+ if let Some(last_key) = &self.last_key
+ && compare_keys(last_key, lookup_key) ==
std::cmp::Ordering::Less
+ {
+ self.cursor.eof = true;
+ self.current_block = None;
+ self.current_block_entry = None;
+ return Ok(SeekResult::Eof);
}
}
}
diff --git a/crates/core/src/metadata/merger.rs
b/crates/core/src/metadata/merger.rs
index ff1ba812..56b8f189 100644
--- a/crates/core/src/metadata/merger.rs
+++ b/crates/core/src/metadata/merger.rs
@@ -134,26 +134,26 @@ impl FilesPartitionMerger {
// Process base records, filtering by key
for record in base_records {
- if let Some(key_str) = record.key_as_str() {
- if key_set.contains(key_str) {
- let decoded = self.decode_record(record)?;
- merged.insert(decoded.key.clone(), decoded);
- }
+ if let Some(key_str) = record.key_as_str()
+ && key_set.contains(key_str)
+ {
+ let decoded = self.decode_record(record)?;
+ merged.insert(decoded.key.clone(), decoded);
}
}
// Process log records, filtering by key
for record in log_records {
- if let Some(key_str) = record.key_as_str() {
- if key_set.contains(key_str) {
- let decoded = self.decode_record(record)?;
- match merged.get_mut(&decoded.key) {
- Some(existing) => {
- self.merge_files_partition_records(existing,
&decoded);
- }
- None => {
- merged.insert(decoded.key.clone(), decoded);
- }
+ if let Some(key_str) = record.key_as_str()
+ && key_set.contains(key_str)
+ {
+ let decoded = self.decode_record(record)?;
+ match merged.get_mut(&decoded.key) {
+ Some(existing) => {
+ self.merge_files_partition_records(existing, &decoded);
+ }
+ None => {
+ merged.insert(decoded.key.clone(), decoded);
}
}
}
diff --git a/crates/core/src/metadata/table/records.rs
b/crates/core/src/metadata/table/records.rs
index 68d55d68..798b4cb6 100644
--- a/crates/core/src/metadata/table/records.rs
+++ b/crates/core/src/metadata/table/records.rs
@@ -287,11 +287,11 @@ pub fn decode_files_partition_record_with_schema(
let mut files = extract_filesystem_metadata(&avro_value);
// Normalize "." -> "" in AllPartitions files map
- if record_type == MetadataRecordType::AllPartitions {
- if let Some(mut file_info) =
files.remove(FilesPartitionRecord::NON_PARTITIONED_NAME) {
- file_info.name = String::new();
- files.insert(String::new(), file_info);
- }
+ if record_type == MetadataRecordType::AllPartitions
+ && let Some(mut file_info) =
files.remove(FilesPartitionRecord::NON_PARTITIONED_NAME)
+ {
+ file_info.name = String::new();
+ files.insert(String::new(), file_info);
}
Ok(FilesPartitionRecord {
diff --git a/crates/core/src/schema/resolver.rs
b/crates/core/src/schema/resolver.rs
index c2dc3a35..e0d02b9f 100644
--- a/crates/core/src/schema/resolver.rs
+++ b/crates/core/src/schema/resolver.rs
@@ -141,10 +141,10 @@ async fn resolve_schema_from_base_file(
})?;
// Try to get the base file path from either 'path' or 'baseFile' field
- if let Some(path) = &first_stat.path {
- if path.ends_with(".parquet") {
- return Ok(storage.get_file_schema(path).await?);
- }
+ if let Some(path) = &first_stat.path
+ && path.ends_with(".parquet")
+ {
+ return Ok(storage.get_file_schema(path).await?);
}
// Handle deltacommit case with baseFile
diff --git a/crates/core/src/table/builder.rs b/crates/core/src/table/builder.rs
index 5e161d79..8fdfd90c 100644
--- a/crates/core/src/table/builder.rs
+++ b/crates/core/src/table/builder.rs
@@ -277,12 +277,11 @@ impl OptionResolver {
if let Ok(bytes) = storage
.get_file_data_from_absolute_path(global_config_path.to_str().unwrap())
.await
+ && let Ok(global_configs) = parse_data_for_options(&bytes, " \t=")
{
- if let Ok(global_configs) = parse_data_for_options(&bytes, " \t=")
{
- for (key, value) in global_configs {
- if key.starts_with("hoodie.") &&
!options.contains_key(&key) {
- options.insert(key.to_string(), value.to_string());
- }
+ for (key, value) in global_configs {
+ if key.starts_with("hoodie.") && !options.contains_key(&key) {
+ options.insert(key.to_string(), value.to_string());
}
}
}
diff --git a/crates/core/src/table/partition.rs
b/crates/core/src/table/partition.rs
index 684c9f3f..6a4a1f75 100644
--- a/crates/core/src/table/partition.rs
+++ b/crates/core/src/table/partition.rs
@@ -632,7 +632,7 @@ mod tests {
};
let transformed = PartitionPruner::transform_filters_for_keygen(
- &[user_filter.clone()],
+ std::slice::from_ref(&user_filter),
&partition_schema,
&configs,
)
diff --git a/crates/core/src/timeline/selector.rs
b/crates/core/src/timeline/selector.rs
index 79f1601d..7fed06fb 100644
--- a/crates/core/src/timeline/selector.rs
+++ b/crates/core/src/timeline/selector.rs
@@ -291,20 +291,20 @@ impl TimelineSelector {
};
let dt = Instant::parse_datetime(timestamp, &self.timezone)?;
- if let Some(start) = self.start_datetime {
- if dt < start {
- return Err(CoreError::Timeline(format!(
- "Instant not created for due to timestamp before start
datetime: {file_name}"
- )));
- }
+ if let Some(start) = self.start_datetime
+ && dt < start
+ {
+ return Err(CoreError::Timeline(format!(
+ "Instant not created for due to timestamp before start
datetime: {file_name}"
+ )));
}
- if let Some(end) = self.end_datetime {
- if dt >= end {
- return Err(CoreError::Timeline(format!(
- "Instant not created for due to timestamp after or at end
datetime: {file_name}"
- )));
- }
+ if let Some(end) = self.end_datetime
+ && dt >= end
+ {
+ return Err(CoreError::Timeline(format!(
+ "Instant not created for due to timestamp after or at end
datetime: {file_name}"
+ )));
}
Ok(Instant {
diff --git a/crates/datafusion/src/util/expr.rs
b/crates/datafusion/src/util/expr.rs
index 36261c97..306d5e45 100644
--- a/crates/datafusion/src/util/expr.rs
+++ b/crates/datafusion/src/util/expr.rs
@@ -295,7 +295,7 @@ mod tests {
#[test]
fn test_convert_binary_expr_extensive() {
// list of test cases with different operators and data types
- let test_cases = vec![
+ let test_cases = [
(
col("int32_col").eq(lit(42i32)),
Some(HudiFilter {
diff --git a/demo/infra/runner/Dockerfile b/demo/infra/runner/Dockerfile
index c649feef..ba4a0687 100644
--- a/demo/infra/runner/Dockerfile
+++ b/demo/infra/runner/Dockerfile
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-FROM rust:1.87
+FROM rust:1.94
RUN apt-get update && apt-get install -y cmake curl ca-certificates
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 728e598b..f4d46b40 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -16,6 +16,6 @@
# under the License.
[toolchain]
-channel = "1.88"
+channel = "1.94"
components = ["rustfmt", "clippy"]
profile = "minimal"