This is an automated email from the ASF dual-hosted git repository.

xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opendal.git


The following commit(s) were added to refs/heads/main by this push:
     new 4d3cb2a4d feat(hugginface): allow specifying huggingface endpoint 
(#6801)
4d3cb2a4d is described below

commit 4d3cb2a4d65c832031a8662884e6056e7d9bbb36
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Nov 19 13:11:06 2025 +0100

    feat(hugginface): allow specifying huggingface endpoint (#6801)
    
    * feat(services): allow configuring Huggingface endpoint
    
    * chore(hugginface): fix hf_resolve urls
    
    * fix(huggingface): percent encode revision
    
    * fix: percent encode slash in revision
    
    * style: run cargo fmt
---
 core/src/services/huggingface/backend.rs | 30 ++++++++++++++++++++----
 core/src/services/huggingface/config.rs  |  4 ++++
 core/src/services/huggingface/core.rs    | 39 ++++++++++++++++++++++----------
 3 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/core/src/services/huggingface/backend.rs 
b/core/src/services/huggingface/backend.rs
index c5b5e4c8b..96ab07163 100644
--- a/core/src/services/huggingface/backend.rs
+++ b/core/src/services/huggingface/backend.rs
@@ -106,6 +106,17 @@ impl HuggingfaceBuilder {
         }
         self
     }
+
+    /// configure the Hub base url. You might want to set this variable if your
+    /// organization is using a Private Hub https://huggingface.co/enterprise
+    ///
+    /// Default is "https://huggingface.co";
+    pub fn endpoint(mut self, endpoint: &str) -> Self {
+        if !endpoint.is_empty() {
+            self.config.endpoint = Some(endpoint.to_string());
+        }
+        self
+    }
 }
 
 impl Builder for HuggingfaceBuilder {
@@ -151,6 +162,20 @@ impl Builder for HuggingfaceBuilder {
 
         let token = self.config.token.as_ref().cloned();
 
+        let endpoint = match &self.config.endpoint {
+            Some(endpoint) => endpoint.clone(),
+            None => {
+                // Try to read from HF_ENDPOINT env var which is used
+                // by the official huggingface clients.
+                if let Ok(env_endpoint) = std::env::var("HF_ENDPOINT") {
+                    env_endpoint
+                } else {
+                    "https://huggingface.co".to_string()
+                }
+            }
+        };
+        debug!("backend use endpoint: {}", &endpoint);
+
         Ok(HuggingfaceBackend {
             core: Arc::new(HuggingfaceCore {
                 info: {
@@ -158,14 +183,10 @@ impl Builder for HuggingfaceBuilder {
                     am.set_scheme(HUGGINGFACE_SCHEME)
                         .set_native_capability(Capability {
                             stat: true,
-
                             read: true,
-
                             list: true,
                             list_with_recursive: true,
-
                             shared: true,
-
                             ..Default::default()
                         });
                     am.into()
@@ -175,6 +196,7 @@ impl Builder for HuggingfaceBuilder {
                 revision,
                 root,
                 token,
+                endpoint,
             }),
         })
     }
diff --git a/core/src/services/huggingface/config.rs 
b/core/src/services/huggingface/config.rs
index 096490560..6b4034bfb 100644
--- a/core/src/services/huggingface/config.rs
+++ b/core/src/services/huggingface/config.rs
@@ -50,6 +50,10 @@ pub struct HuggingfaceConfig {
     ///
     /// This is optional.
     pub token: Option<String>,
+    /// Endpoint of the Huggingface Hub.
+    ///
+    /// Default is "https://huggingface.co";.
+    pub endpoint: Option<String>,
 }
 
 impl Debug for HuggingfaceConfig {
diff --git a/core/src/services/huggingface/core.rs 
b/core/src/services/huggingface/core.rs
index 5f3a65dff..e852d0a6c 100644
--- a/core/src/services/huggingface/core.rs
+++ b/core/src/services/huggingface/core.rs
@@ -22,12 +22,17 @@ use bytes::Bytes;
 use http::Request;
 use http::Response;
 use http::header;
+use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode};
 use serde::Deserialize;
 
 use super::backend::RepoType;
 use crate::raw::*;
 use crate::*;
 
+fn percent_encode_revision(revision: &str) -> String {
+    utf8_percent_encode(revision, NON_ALPHANUMERIC).to_string()
+}
+
 pub struct HuggingfaceCore {
     pub info: Arc<AccessorInfo>,
 
@@ -36,6 +41,7 @@ pub struct HuggingfaceCore {
     pub revision: String,
     pub root: String,
     pub token: Option<String>,
+    pub endpoint: String,
 }
 
 impl Debug for HuggingfaceCore {
@@ -45,6 +51,7 @@ impl Debug for HuggingfaceCore {
             .field("repo_id", &self.repo_id)
             .field("revision", &self.revision)
             .field("root", &self.root)
+            .field("endpoint", &self.endpoint)
             .finish_non_exhaustive()
     }
 }
@@ -57,12 +64,16 @@ impl HuggingfaceCore {
 
         let url = match self.repo_type {
             RepoType::Model => format!(
-                "https://huggingface.co/api/models/{}/paths-info/{}";,
-                &self.repo_id, &self.revision
+                "{}/api/models/{}/paths-info/{}",
+                &self.endpoint,
+                &self.repo_id,
+                percent_encode_revision(&self.revision)
             ),
             RepoType::Dataset => format!(
-                "https://huggingface.co/api/datasets/{}/paths-info/{}";,
-                &self.repo_id, &self.revision
+                "{}/api/datasets/{}/paths-info/{}",
+                &self.endpoint,
+                &self.repo_id,
+                percent_encode_revision(&self.revision)
             ),
         };
 
@@ -92,15 +103,17 @@ impl HuggingfaceCore {
 
         let mut url = match self.repo_type {
             RepoType::Model => format!(
-                "https://huggingface.co/api/models/{}/tree/{}/{}?expand=True";,
+                "{}/api/models/{}/tree/{}/{}?expand=True",
+                &self.endpoint,
                 &self.repo_id,
-                &self.revision,
+                percent_encode_revision(&self.revision),
                 percent_encode_path(&p)
             ),
             RepoType::Dataset => format!(
-                
"https://huggingface.co/api/datasets/{}/tree/{}/{}?expand=True";,
+                "{}/api/datasets/{}/tree/{}/{}?expand=True",
+                &self.endpoint,
                 &self.repo_id,
-                &self.revision,
+                percent_encode_revision(&self.revision),
                 percent_encode_path(&p)
             ),
         };
@@ -134,15 +147,17 @@ impl HuggingfaceCore {
 
         let url = match self.repo_type {
             RepoType::Model => format!(
-                "https://huggingface.co/{}/resolve/{}/{}";,
+                "{}/{}/resolve/{}/{}",
+                &self.endpoint,
                 &self.repo_id,
-                &self.revision,
+                percent_encode_revision(&self.revision),
                 percent_encode_path(&p)
             ),
             RepoType::Dataset => format!(
-                "https://huggingface.co/datasets/{}/resolve/{}/{}";,
+                "{}/datasets/{}/resolve/{}/{}",
+                &self.endpoint,
                 &self.repo_id,
-                &self.revision,
+                percent_encode_revision(&self.revision),
                 percent_encode_path(&p)
             ),
         };

Reply via email to