This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e0961d5547 Replace lazy_static with OnceLock (#7409)
e0961d5547 is described below

commit e0961d55479aab1c4f92eca817fcce4ec25d7c3e
Author: Kousuke Saruta <saru...@apache.org>
AuthorDate: Sat Aug 26 05:46:11 2023 +0900

    Replace lazy_static with OnceLock (#7409)
    
    Co-authored-by: Andrew Lamb <and...@nerdnetworks.org>
---
 datafusion-cli/Cargo.lock                          |  3 --
 datafusion/core/Cargo.toml                         |  1 -
 datafusion/expr/Cargo.toml                         |  1 -
 datafusion/expr/src/built_in_function.rs           | 42 ++++++++++--------
 datafusion/physical-expr/Cargo.toml                |  1 -
 datafusion/physical-expr/src/regex_expressions.rs  | 10 ++---
 datafusion/sqllogictest/Cargo.toml                 |  1 -
 .../src/engines/datafusion_engine/normalize.rs     | 51 +++++++++++-----------
 8 files changed, 54 insertions(+), 56 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 04e802e4ed..3a2f9a7b73 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1074,7 +1074,6 @@ dependencies = [
  "hashbrown 0.14.0",
  "indexmap 2.0.0",
  "itertools 0.11.0",
- "lazy_static",
  "log",
  "num_cpus",
  "object_store",
@@ -1164,7 +1163,6 @@ dependencies = [
  "ahash",
  "arrow",
  "datafusion-common",
- "lazy_static",
  "sqlparser",
  "strum 0.25.0",
  "strum_macros 0.25.2",
@@ -1206,7 +1204,6 @@ dependencies = [
  "hex",
  "indexmap 2.0.0",
  "itertools 0.11.0",
- "lazy_static",
  "libc",
  "log",
  "md-5",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index b28ab05cb1..bfa00b2a9f 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -75,7 +75,6 @@ half = { version = "2.1", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
 indexmap = "2.0.0"
 itertools = "0.11"
-lazy_static = { version = "^1.4.0" }
 log = "^0.4"
 num-traits = { version = "0.2", optional = true }
 num_cpus = "1.13.0"
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index 9634f23860..e46b66361d 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -38,7 +38,6 @@ path = "src/lib.rs"
 ahash = { version = "0.8", default-features = false, features = 
["runtime-rng"] }
 arrow = { workspace = true }
 datafusion-common = { path = "../common", version = "30.0.0" }
-lazy_static = { version = "^1.4.0" }
 sqlparser = { workspace = true }
 strum = { version = "0.25.0", features = ["derive"] }
 strum_macros = "0.25.0"
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index de046cde89..0fb23be5af 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -28,12 +28,10 @@ use datafusion_common::{internal_err, plan_err, 
DataFusionError, Result};
 use std::collections::HashMap;
 use std::fmt;
 use std::str::FromStr;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 use strum::IntoEnumIterator;
 use strum_macros::EnumIter;
 
-use lazy_static::lazy_static;
-
 /// Enum of all built-in scalar functions
 // Contributor's guide for adding new scalar functions
 // 
https://arrow.apache.org/datafusion/contributor-guide/index.html#how-to-add-a-new-scalar-function
@@ -282,26 +280,34 @@ pub enum BuiltinScalarFunction {
     ArrowTypeof,
 }
 
-lazy_static! {
-    /// Maps the sql function name to `BuiltinScalarFunction`
-    static ref NAME_TO_FUNCTION: HashMap<&'static str, BuiltinScalarFunction> 
= {
-        let mut map: HashMap<&'static str, BuiltinScalarFunction> = 
HashMap::new();
+/// Maps the sql function name to `BuiltinScalarFunction`
+fn name_to_function() -> &'static HashMap<&'static str, BuiltinScalarFunction> 
{
+    static NAME_TO_FUNCTION_LOCK: OnceLock<HashMap<&'static str, 
BuiltinScalarFunction>> =
+        OnceLock::new();
+    NAME_TO_FUNCTION_LOCK.get_or_init(|| {
+        let mut map = HashMap::new();
         BuiltinScalarFunction::iter().for_each(|func| {
             let a = aliases(&func);
-            a.iter().for_each(|a| {map.insert(a, func);});
+            a.iter().for_each(|&a| {
+                map.insert(a, func);
+            });
         });
         map
-    };
+    })
+}
 
-    /// Maps `BuiltinScalarFunction` --> canonical sql function
-    /// First alias in the array is used to display function names
-    static ref FUNCTION_TO_NAME: HashMap<BuiltinScalarFunction, &'static str> 
= {
-        let mut map: HashMap<BuiltinScalarFunction, &'static str> = 
HashMap::new();
+/// Maps `BuiltinScalarFunction` --> canonical sql function
+/// First alias in the array is used to display function names
+fn function_to_name() -> &'static HashMap<BuiltinScalarFunction, &'static str> 
{
+    static FUNCTION_TO_NAME_LOCK: OnceLock<HashMap<BuiltinScalarFunction, 
&'static str>> =
+        OnceLock::new();
+    FUNCTION_TO_NAME_LOCK.get_or_init(|| {
+        let mut map = HashMap::new();
         BuiltinScalarFunction::iter().for_each(|func| {
-            map.insert(func, aliases(&func).first().unwrap_or(&"NO_ALIAS"));
+            map.insert(func, *aliases(&func).first().unwrap_or(&"NO_ALIAS"));
         });
         map
-    };
+    })
 }
 
 impl BuiltinScalarFunction {
@@ -1379,14 +1385,14 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static 
[&'static str] {
 impl fmt::Display for BuiltinScalarFunction {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         // .unwrap is safe here because compiler makes sure the map will have 
matches for each BuiltinScalarFunction
-        write!(f, "{}", FUNCTION_TO_NAME.get(self).unwrap())
+        write!(f, "{}", function_to_name().get(self).unwrap())
     }
 }
 
 impl FromStr for BuiltinScalarFunction {
     type Err = DataFusionError;
     fn from_str(name: &str) -> Result<BuiltinScalarFunction> {
-        if let Some(func) = NAME_TO_FUNCTION.get(name) {
+        if let Some(func) = name_to_function().get(name) {
             Ok(*func)
         } else {
             plan_err!("There is no built-in function named {name}")
@@ -1453,7 +1459,7 @@ mod tests {
     // and then back to a variant. The test asserts that the original variant 
and
     // the reconstructed variant are the same.
     fn test_display_and_from_str() {
-        for (_, func_original) in NAME_TO_FUNCTION.iter() {
+        for (_, func_original) in name_to_function().iter() {
             let func_name = func_original.to_string();
             let func_from_str = 
BuiltinScalarFunction::from_str(&func_name).unwrap();
             assert_eq!(func_from_str, *func_original);
diff --git a/datafusion/physical-expr/Cargo.toml 
b/datafusion/physical-expr/Cargo.toml
index d529b6762e..dbaacb4dba 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -59,7 +59,6 @@ hashbrown = { version = "0.14", features = ["raw"] }
 hex = { version = "0.4", optional = true }
 indexmap = "2.0.0"
 itertools = { version = "0.11", features = ["use_std"] }
-lazy_static = { version = "^1.4.0" }
 libc = "0.2.140"
 log = "^0.4"
 md-5 = { version = "^0.10.0", optional = true }
diff --git a/datafusion/physical-expr/src/regex_expressions.rs 
b/datafusion/physical-expr/src/regex_expressions.rs
index f213336602..5aea70f75a 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -32,9 +32,8 @@ use datafusion_common::{
 };
 use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
 use hashbrown::HashMap;
-use lazy_static::lazy_static;
 use regex::Regex;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use crate::functions::{make_scalar_function, make_scalar_function_with_hints, 
Hint};
 
@@ -82,10 +81,11 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) 
-> Result<ArrayRef> {
 /// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
 /// used by regexp_replace
 fn regex_replace_posix_groups(replacement: &str) -> String {
-    lazy_static! {
-        static ref CAPTURE_GROUPS_RE: Regex = 
Regex::new(r"(\\)(\d*)").unwrap();
+    fn capture_groups_re() -> &'static Regex {
+        static CAPTURE_GROUPS_RE_LOCK: OnceLock<Regex> = OnceLock::new();
+        CAPTURE_GROUPS_RE_LOCK.get_or_init(|| 
Regex::new(r"(\\)(\d*)").unwrap())
     }
-    CAPTURE_GROUPS_RE
+    capture_groups_re()
         .replace_all(replacement, "$${$2}")
         .into_owned()
 }
diff --git a/datafusion/sqllogictest/Cargo.toml 
b/datafusion/sqllogictest/Cargo.toml
index 15a23a92dd..0b4a0ef419 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -38,7 +38,6 @@ datafusion = {path = "../core", version = "30.0.0"}
 datafusion-common = {path = "../common", version = "30.0.0"}
 half = "2.2.1"
 itertools = "0.11"
-lazy_static = {version = "^1.4.0"}
 object_store = "0.7.0"
 rust_decimal = {version = "1.27.0"}
 log = "^0.4"
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs 
b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index 942ba8e05b..c0db111bc6 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -20,8 +20,8 @@ use arrow::{array, array::ArrayRef, datatypes::DataType, 
record_batch::RecordBat
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
 use datafusion_common::DFField;
 use datafusion_common::DataFusionError;
-use lazy_static::lazy_static;
 use std::path::PathBuf;
+use std::sync::OnceLock;
 
 use crate::engines::output::DFColumnType;
 
@@ -126,7 +126,7 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = 
Vec<String>> {
 /// ```
 fn normalize_paths(mut row: Vec<String>) -> Vec<String> {
     row.iter_mut().for_each(|s| {
-        let workspace_root: &str = WORKSPACE_ROOT.as_ref();
+        let workspace_root: &str = workspace_root().as_ref();
         if s.contains(workspace_root) {
             *s = s.replace(workspace_root, "WORKSPACE_ROOT");
         }
@@ -135,33 +135,32 @@ fn normalize_paths(mut row: Vec<String>) -> Vec<String> {
 }
 
 /// return the location of the datafusion checkout
-fn workspace_root() -> object_store::path::Path {
-    // e.g. /Software/arrow-datafusion/datafusion/core
-    let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+fn workspace_root() -> &'static object_store::path::Path {
+    static WORKSPACE_ROOT_LOCK: OnceLock<object_store::path::Path> = 
OnceLock::new();
+    WORKSPACE_ROOT_LOCK.get_or_init(|| {
+        // e.g. /Software/arrow-datafusion/datafusion/core
+        let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
 
-    // e.g. /Software/arrow-datafusion/datafusion
-    let workspace_root = dir
-        .parent()
-        .expect("Can not find parent of datafusion/core")
-        // e.g. /Software/arrow-datafusion
-        .parent()
-        .expect("parent of datafusion")
-        .to_string_lossy();
+        // e.g. /Software/arrow-datafusion/datafusion
+        let workspace_root = dir
+            .parent()
+            .expect("Can not find parent of datafusion/core")
+            // e.g. /Software/arrow-datafusion
+            .parent()
+            .expect("parent of datafusion")
+            .to_string_lossy();
 
-    let sanitized_workplace_root = if cfg!(windows) {
-        // Object store paths are delimited with `/`, e.g. 
`D:/a/arrow-datafusion/arrow-datafusion/testing/data/csv/aggregate_test_100.csv`.
-        // The default windows delimiter is `\`, so the workplace path is 
`D:\a\arrow-datafusion\arrow-datafusion`.
-        workspace_root.replace(std::path::MAIN_SEPARATOR, 
object_store::path::DELIMITER)
-    } else {
-        workspace_root.to_string()
-    };
-
-    object_store::path::Path::parse(sanitized_workplace_root).unwrap()
-}
+        let sanitized_workplace_root = if cfg!(windows) {
+            // Object store paths are delimited with `/`, e.g. 
`D:/a/arrow-datafusion/arrow-datafusion/testing/data/csv/aggregate_test_100.csv`.
+            // The default windows delimiter is `\`, so the workplace path is 
`D:\a\arrow-datafusion\arrow-datafusion`.
+            workspace_root
+                .replace(std::path::MAIN_SEPARATOR, 
object_store::path::DELIMITER)
+        } else {
+            workspace_root.to_string()
+        };
 
-// holds the root directory
-lazy_static! {
-    static ref WORKSPACE_ROOT: object_store::path::Path = workspace_root();
+        object_store::path::Path::parse(sanitized_workplace_root).unwrap()
+    })
 }
 
 /// Convert a single batch to a `Vec<Vec<String>>` for comparison

Reply via email to