This is an automated email from the ASF dual-hosted git repository. dheres pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push: new e0961d5547 Replace lazy_static with OnceLock (#7409) e0961d5547 is described below commit e0961d55479aab1c4f92eca817fcce4ec25d7c3e Author: Kousuke Saruta <saru...@apache.org> AuthorDate: Sat Aug 26 05:46:11 2023 +0900 Replace lazy_static with OnceLock (#7409) Co-authored-by: Andrew Lamb <and...@nerdnetworks.org> --- datafusion-cli/Cargo.lock | 3 -- datafusion/core/Cargo.toml | 1 - datafusion/expr/Cargo.toml | 1 - datafusion/expr/src/built_in_function.rs | 42 ++++++++++-------- datafusion/physical-expr/Cargo.toml | 1 - datafusion/physical-expr/src/regex_expressions.rs | 10 ++--- datafusion/sqllogictest/Cargo.toml | 1 - .../src/engines/datafusion_engine/normalize.rs | 51 +++++++++++----------- 8 files changed, 54 insertions(+), 56 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 04e802e4ed..3a2f9a7b73 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1074,7 +1074,6 @@ dependencies = [ "hashbrown 0.14.0", "indexmap 2.0.0", "itertools 0.11.0", - "lazy_static", "log", "num_cpus", "object_store", @@ -1164,7 +1163,6 @@ dependencies = [ "ahash", "arrow", "datafusion-common", - "lazy_static", "sqlparser", "strum 0.25.0", "strum_macros 0.25.2", @@ -1206,7 +1204,6 @@ dependencies = [ "hex", "indexmap 2.0.0", "itertools 0.11.0", - "lazy_static", "libc", "log", "md-5", diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index b28ab05cb1..bfa00b2a9f 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -75,7 +75,6 @@ half = { version = "2.1", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } indexmap = "2.0.0" itertools = "0.11" -lazy_static = { version = "^1.4.0" } log = "^0.4" num-traits = { version = "0.2", optional = true } num_cpus = "1.13.0" diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 9634f23860..e46b66361d 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -38,7 +38,6 @@ path = "src/lib.rs" ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { workspace = true } datafusion-common = { path = "../common", version = "30.0.0" } -lazy_static = { version = "^1.4.0" } sqlparser = { workspace = true } strum = { version = "0.25.0", features = ["derive"] } strum_macros = "0.25.0" diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index de046cde89..0fb23be5af 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -28,12 +28,10 @@ use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; use std::collections::HashMap; use std::fmt; use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use strum::IntoEnumIterator; use strum_macros::EnumIter; -use lazy_static::lazy_static; - /// Enum of all built-in scalar functions // Contributor's guide for adding new scalar functions // https://arrow.apache.org/datafusion/contributor-guide/index.html#how-to-add-a-new-scalar-function @@ -282,26 +280,34 @@ pub enum BuiltinScalarFunction { ArrowTypeof, } -lazy_static! { - /// Maps the sql function name to `BuiltinScalarFunction` - static ref NAME_TO_FUNCTION: HashMap<&'static str, BuiltinScalarFunction> = { - let mut map: HashMap<&'static str, BuiltinScalarFunction> = HashMap::new(); +/// Maps the sql function name to `BuiltinScalarFunction` +fn name_to_function() -> &'static HashMap<&'static str, BuiltinScalarFunction> { + static NAME_TO_FUNCTION_LOCK: OnceLock<HashMap<&'static str, BuiltinScalarFunction>> = + OnceLock::new(); + NAME_TO_FUNCTION_LOCK.get_or_init(|| { + let mut map = HashMap::new(); BuiltinScalarFunction::iter().for_each(|func| { let a = aliases(&func); - a.iter().for_each(|a| {map.insert(a, func);}); + a.iter().for_each(|&a| { + map.insert(a, func); + }); }); map - }; + }) +} - /// Maps `BuiltinScalarFunction` --> canonical sql function - /// First alias in the array is used to display function names - static ref FUNCTION_TO_NAME: HashMap<BuiltinScalarFunction, &'static str> = { - let mut map: HashMap<BuiltinScalarFunction, &'static str> = HashMap::new(); +/// Maps `BuiltinScalarFunction` --> canonical sql function +/// First alias in the array is used to display function names +fn function_to_name() -> &'static HashMap<BuiltinScalarFunction, &'static str> { + static FUNCTION_TO_NAME_LOCK: OnceLock<HashMap<BuiltinScalarFunction, &'static str>> = + OnceLock::new(); + FUNCTION_TO_NAME_LOCK.get_or_init(|| { + let mut map = HashMap::new(); BuiltinScalarFunction::iter().for_each(|func| { - map.insert(func, aliases(&func).first().unwrap_or(&"NO_ALIAS")); + map.insert(func, *aliases(&func).first().unwrap_or(&"NO_ALIAS")); }); map - }; + }) } impl BuiltinScalarFunction { @@ -1379,14 +1385,14 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] { impl fmt::Display for BuiltinScalarFunction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // .unwrap is safe here because compiler makes sure the map will have matches for each BuiltinScalarFunction - write!(f, "{}", FUNCTION_TO_NAME.get(self).unwrap()) + write!(f, "{}", function_to_name().get(self).unwrap()) } } impl FromStr for BuiltinScalarFunction { type Err = DataFusionError; fn from_str(name: &str) -> Result<BuiltinScalarFunction> { - if let Some(func) = NAME_TO_FUNCTION.get(name) { + if let Some(func) = name_to_function().get(name) { Ok(*func) } else { plan_err!("There is no built-in function named {name}") @@ -1453,7 +1459,7 @@ mod tests { // and then back to a variant. The test asserts that the original variant and // the reconstructed variant are the same. fn test_display_and_from_str() { - for (_, func_original) in NAME_TO_FUNCTION.iter() { + for (_, func_original) in name_to_function().iter() { let func_name = func_original.to_string(); let func_from_str = BuiltinScalarFunction::from_str(&func_name).unwrap(); assert_eq!(func_from_str, *func_original); diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index d529b6762e..dbaacb4dba 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -59,7 +59,6 @@ hashbrown = { version = "0.14", features = ["raw"] } hex = { version = "0.4", optional = true } indexmap = "2.0.0" itertools = { version = "0.11", features = ["use_std"] } -lazy_static = { version = "^1.4.0" } libc = "0.2.140" log = "^0.4" md-5 = { version = "^0.10.0", optional = true } diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs index f213336602..5aea70f75a 100644 --- a/datafusion/physical-expr/src/regex_expressions.rs +++ b/datafusion/physical-expr/src/regex_expressions.rs @@ -32,9 +32,8 @@ use datafusion_common::{ }; use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; use hashbrown::HashMap; -use lazy_static::lazy_static; use regex::Regex; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use crate::functions::{make_scalar_function, make_scalar_function_with_hints, Hint}; @@ -82,10 +81,11 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> { /// replace POSIX capture groups (like \1) with Rust Regex group (like ${1}) /// used by regexp_replace fn regex_replace_posix_groups(replacement: &str) -> String { - lazy_static! { - static ref CAPTURE_GROUPS_RE: Regex = Regex::new(r"(\\)(\d*)").unwrap(); + fn capture_groups_re() -> &'static Regex { + static CAPTURE_GROUPS_RE_LOCK: OnceLock<Regex> = OnceLock::new(); + CAPTURE_GROUPS_RE_LOCK.get_or_init(|| Regex::new(r"(\\)(\d*)").unwrap()) } - CAPTURE_GROUPS_RE + capture_groups_re() .replace_all(replacement, "$${$2}") .into_owned() } diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 15a23a92dd..0b4a0ef419 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -38,7 +38,6 @@ datafusion = {path = "../core", version = "30.0.0"} datafusion-common = {path = "../common", version = "30.0.0"} half = "2.2.1" itertools = "0.11" -lazy_static = {version = "^1.4.0"} object_store = "0.7.0" rust_decimal = {version = "1.27.0"} log = "^0.4" diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index 942ba8e05b..c0db111bc6 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -20,8 +20,8 @@ use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBat use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; use datafusion_common::DFField; use datafusion_common::DataFusionError; -use lazy_static::lazy_static; use std::path::PathBuf; +use std::sync::OnceLock; use crate::engines::output::DFColumnType; @@ -126,7 +126,7 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> { /// ``` fn normalize_paths(mut row: Vec<String>) -> Vec<String> { row.iter_mut().for_each(|s| { - let workspace_root: &str = WORKSPACE_ROOT.as_ref(); + let workspace_root: &str = workspace_root().as_ref(); if s.contains(workspace_root) { *s = s.replace(workspace_root, "WORKSPACE_ROOT"); } @@ -135,33 +135,32 @@ fn normalize_paths(mut row: Vec<String>) -> Vec<String> { } /// return the location of the datafusion checkout -fn workspace_root() -> object_store::path::Path { - // e.g. /Software/arrow-datafusion/datafusion/core - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); +fn workspace_root() -> &'static object_store::path::Path { + static WORKSPACE_ROOT_LOCK: OnceLock<object_store::path::Path> = OnceLock::new(); + WORKSPACE_ROOT_LOCK.get_or_init(|| { + // e.g. /Software/arrow-datafusion/datafusion/core + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - // e.g. /Software/arrow-datafusion/datafusion - let workspace_root = dir - .parent() - .expect("Can not find parent of datafusion/core") - // e.g. /Software/arrow-datafusion - .parent() - .expect("parent of datafusion") - .to_string_lossy(); + // e.g. /Software/arrow-datafusion/datafusion + let workspace_root = dir + .parent() + .expect("Can not find parent of datafusion/core") + // e.g. /Software/arrow-datafusion + .parent() + .expect("parent of datafusion") + .to_string_lossy(); - let sanitized_workplace_root = if cfg!(windows) { - // Object store paths are delimited with `/`, e.g. `D:/a/arrow-datafusion/arrow-datafusion/testing/data/csv/aggregate_test_100.csv`. - // The default windows delimiter is `\`, so the workplace path is `D:\a\arrow-datafusion\arrow-datafusion`. - workspace_root.replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER) - } else { - workspace_root.to_string() - }; - - object_store::path::Path::parse(sanitized_workplace_root).unwrap() -} + let sanitized_workplace_root = if cfg!(windows) { + // Object store paths are delimited with `/`, e.g. `D:/a/arrow-datafusion/arrow-datafusion/testing/data/csv/aggregate_test_100.csv`. + // The default windows delimiter is `\`, so the workplace path is `D:\a\arrow-datafusion\arrow-datafusion`. + workspace_root + .replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER) + } else { + workspace_root.to_string() + }; -// holds the root directory -lazy_static! { - static ref WORKSPACE_ROOT: object_store::path::Path = workspace_root(); + object_store::path::Path::parse(sanitized_workplace_root).unwrap() + }) } /// Convert a single batch to a `Vec<Vec<String>>` for comparison