This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git
The following commit(s) were added to refs/heads/main by this push:
new 18ed86c Whitelisting Onelake API & Workspace PL FQDNs (#1) (#711)
18ed86c is described below
commit 18ed86caf5d06aa318f38158501eeb736f28a73b
Author: Smriti Agrawal <[email protected]>
AuthorDate: Fri Jun 12 01:44:14 2026 +0530
Whitelisting Onelake API & Workspace PL FQDNs (#1) (#711)
* Whitelisting Onelake API & Workspace PL FQDNs
* Addressing comments to whitelist api-onelake fqdns and add UTs
* Reverting the mistakenly modified UT
* Adding validation for xy in WS-PL URL & case insensitive regex
* Making regex an optional dependency
* Eliminating the use of regex for ws-pl fqdn matching
* Adding a separte method 'azure_test_workspace_private_link' for ws-pl
tests
* Organising private link UTs
* Adding integration test for WSPL & rebasing with unpstream
* Whitelisting Onelake API & Workspace PL FQDNs
* Addressing comments to whitelist api-onelake fqdns and add UTs
* Reverting the mistakenly modified UT
* Adding validation for xy in WS-PL URL & case insensitive regex
* Making regex an optional dependency
* Eliminating the use of regex for ws-pl fqdn matching
* Adding a separte method 'azure_test_workspace_private_link' for ws-pl
tests
* Organising private link UTs
* Fixing cargo fmt
* Fixing Clippy
* Fixing Clippy
* Fixing Clippy
* Modified Integration test to pass workspace & artifact id within URL only
* Fixing Clippy
* Adding WSPL support for ABFSS scheme as well
* Adding comments for readability
* Adding comments for readability
---------
Co-authored-by: Smriti Agrawal <[email protected]>
---
src/azure/builder.rs | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/azure/mod.rs | 41 +++++++++++++++++
2 files changed, 163 insertions(+)
diff --git a/src/azure/builder.rs b/src/azure/builder.rs
index 1f57fac..4ef95cb 100644
--- a/src/azure/builder.rs
+++ b/src/azure/builder.rs
@@ -666,10 +666,26 @@ impl MicrosoftAzureBuilder {
self.container_name = Some(validate(host)?);
} else {
match host.split_once('.') {
+ // Workspace-level Private Link detection
+ //
"{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
+ Some((workspaceid, rest))
+ if rest.starts_with('z') &&
rest.ends_with("fabric.microsoft.com") =>
+ {
+ // Account name for WS-PL is two labels:
"{workspaceid}.z{xy}"
+ let (zone, _) =
rest.split_once('.').unwrap_or((rest, ""));
+
+ self.account_name =
Some(format!("{workspaceid}.{zone}"));
+ self.endpoint = Some(format!("https://{}", host));
+
+ self.container_name =
Some(validate(parsed.username())?);
+ self.use_fabric_endpoint = true.into();
+ }
+
Some((a, "dfs.core.windows.net")) | Some((a,
"blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
self.container_name =
Some(validate(parsed.username())?);
}
+
Some((a, "dfs.fabric.microsoft.com"))
| Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);
@@ -681,6 +697,30 @@ impl MicrosoftAzureBuilder {
}
}
"https" => match host.split_once('.') {
+ // Workspace-level Private Link detection
+ // "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
+ Some((workspaceid, rest))
+ if rest.starts_with('z') &&
rest.ends_with("fabric.microsoft.com") =>
+ {
+ // rest looks like: "z28.dfs.fabric.microsoft.com" /
"z28.blob.fabric.microsoft.com" / etc.
+ // Account name for WS-PL is two labels:
"{workspaceid}.z{xy}"
+ let (zone, _) = rest.split_once('.').unwrap_or((rest, ""));
+
+ self.account_name = Some(format!("{workspaceid}.{zone}"));
+ self.endpoint = Some(format!("https://{}", host));
+
+ // Attempt to infer the container name from the URL
+ let container =
parsed.path_segments().unwrap().next().expect(
+ "iterator always contains at least one string (which
may be empty)",
+ );
+
+ if !container.is_empty() {
+ self.container_name = Some(validate(container)?);
+ }
+
+ self.use_fabric_endpoint = true.into();
+ }
+
Some((a, "dfs.core.windows.net")) | Some((a,
"blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
let container =
parsed.path_segments().unwrap().next().expect(
@@ -1204,6 +1244,17 @@ mod tests {
assert_eq!(builder.container_name.as_deref(), Some("container"));
assert!(builder.use_fabric_endpoint.get().unwrap());
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+
.parse_url("https://onelake.dfs.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456.lakehouse/Files/tables/sales/data.parquet")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("onelake".to_string()));
+ assert_eq!(
+ builder.container_name.as_deref(),
+ Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3")
+ );
+ assert!(builder.use_fabric_endpoint.get().unwrap());
+
let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.blob.fabric.microsoft.com/")
@@ -1235,6 +1286,77 @@ mod tests {
}
}
+ #[test]
+ fn azure_test_workspace_private_link() {
+ let test_cases: Vec<(&str, &str, Option<&str>)> = vec![
+ (
+
"https://Ab000000000000000000000000000000.zAb.dfs.fabric.microsoft.com/",
+ "ab000000000000000000000000000000.zab",
+ None,
+ ),
+ (
+
"https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/",
+ "ab000000000000000000000000000000.zab",
+ None,
+ ),
+ (
+
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+ ),
+ (
+
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.dfs.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+ ),
+ (
+
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.onelake.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+ ),
+ (
+
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.w.api.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+ ),
+ (
+
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.c.api.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+ ),
+ (
+
"abfss://c047b3e34e89407a98d7cf9949ae9...@c047b3e34e89407a98d7cf9949ae92a3.zc0.dfs.fabric.microsoft.com/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e34e89407a98d7cf9949ae92a3"),
+ ),
+ (
+
"abfss://c047b3e34e89407a98d7cf9949ae9...@c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+ "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+ Some("c047b3e34e89407a98d7cf9949ae92a3"),
+ ),
+ ];
+
+ for (url, expected_account, expected_container) in &test_cases {
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder.parse_url(url).unwrap();
+
+ assert_eq!(
+ builder.account_name.as_deref(),
+ Some(*expected_account),
+ "account mismatch for URL: {url}"
+ );
+ assert_eq!(
+ builder.container_name.as_deref(),
+ *expected_container,
+ "container mismatch for URL: {url}"
+ );
+ assert!(
+ builder.use_fabric_endpoint.get().unwrap(),
+ "use_fabric_endpoint not set for URL: {url}"
+ );
+ }
+ }
+
#[test]
fn azure_test_config_from_map() {
let azure_client_id = "object_store:fake_access_key_id";
diff --git a/src/azure/mod.rs b/src/azure/mod.rs
index 1429bec..e2ed05f 100644
--- a/src/azure/mod.rs
+++ b/src/azure/mod.rs
@@ -378,6 +378,47 @@ mod tests {
}
}
+ #[ignore = "Used for manual testing against a real Workspace Private Link
Endpoint."]
+ #[tokio::test]
+ async fn azure_onelake_wspl_test() {
+ maybe_skip_integration!();
+
+ let url =
+ std::env::var("AZURE_ONELAKE_URL").expect("Set AZURE_ONELAKE_URL
to a WS-PL FQDN");
+ let parsed = url::Url::parse(&url).unwrap();
+
+ let path = match parsed.scheme() {
+ "abfss" | "abfs" => {
+ // abfss://<container>@<host>/<path...>
+ // container is in username, entire path is the object path
+ let segments: Vec<&str> =
parsed.path_segments().unwrap().collect();
+ Path::from(segments.join("/"))
+ }
+ _ => {
+ // https://<host>/<container>/<path...>
+ // first segment is container, rest is the object path
+ let segments: Vec<&str> =
parsed.path_segments().unwrap().collect();
+ Path::from(segments[1..].join("/"))
+ }
+ };
+
+ let store = MicrosoftAzureBuilder::new()
+ .with_url(&url)
+ .with_bearer_token_authorization(
+ std::env::var("AZURE_STORAGE_TOKEN").expect("Set
AZURE_STORAGE_TOKEN"),
+ )
+ .build()
+ .unwrap();
+
+ let data = Bytes::from("Hello OneLake WSPL");
+
+ store.put(&path, data.clone().into()).await.unwrap();
+ let result = store.get(&path).await.unwrap();
+ let loaded = result.bytes().await.unwrap();
+ assert_eq!(data, loaded);
+ store.delete(&path).await.unwrap();
+ }
+
#[ignore = "Used for manual testing against a real storage account."]
#[tokio::test]
async fn test_user_delegation_key() {