This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new bc5a528a Add Entra ID auth manager (#2974)
bc5a528a is described below

commit bc5a528a4e25267f17e16367b2eec976b42517ce
Author: Kevin Liu <[email protected]>
AuthorDate: Wed Jan 28 23:23:28 2026 -0500

    Add Entra ID auth manager (#2974)
    
    <!--
    Thanks for opening a pull request!
    -->
    
    <!-- In the case this PR will resolve an issue, please replace
    ${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
    <!-- Closes #${GITHUB_ISSUE_ID} -->
    Closes #2800
    
    # Rationale for this change
    Adds `EntraAuthManager` for Microsoft Entra ID (Azure AD) authentication
    using `DefaultAzureCredential` from the Azure Identity library.
    
    Supported auth methods
    - Environment variables (`AZURE_TENANT_ID`, `AZURE_CLIENT_ID`,
    `AZURE_CLIENT_SECRET`)
    - Managed Identity (system or user-assigned)
    - Azure CLI (`az login`)
    - Workload Identity (AKS)
    
    ## Are these changes tested?
    Yes, unit tests, tested locally with OneLake and az cli,
    
    Preview for docs:
    <img width="872" height="638" alt="Screenshot 2026-01-27 at 5 40 59 PM"
    
src="https://github.com/user-attachments/assets/2a9f42ff-ac3d-4c22-b524-1caa7a70227f";
    />
    
    ## Are there any user-facing changes?
    
    <!-- In the case of user-facing changes, please add the changelog label.
    -->
---
 mkdocs/docs/configuration.md    | 28 +++++++++---
 mkdocs/docs/index.md            |  3 ++
 pyiceberg/catalog/rest/auth.py  | 63 ++++++++++++++++++++++++++
 pyproject.toml                  |  1 +
 tests/catalog/test_rest_auth.py | 98 ++++++++++++++++++++++++++++++++++++++++-
 uv.lock                         |  6 ++-
 6 files changed, 192 insertions(+), 7 deletions(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index eb19e673..391cca78 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -395,6 +395,7 @@ The RESTCatalog supports pluggable authentication via the 
`auth` configuration b
 - `oauth2`: OAuth2 client credentials flow.
 - `custom`: Custom authentication manager (requires `auth.impl`).
 - `google`: Google Authentication support
+- `entra`: Microsoft Entra ID (Azure AD) authentication support
 
 ###### Configuration Properties
 
@@ -422,6 +423,7 @@ catalog:
 | `auth.oauth2`    | If type is `oauth2` | Block containing OAuth2 
configuration (see below).                                 |
 | `auth.custom`    | If type is `custom` | Block containing configuration for 
the custom AuthManager.                          |
 | `auth.google`    | If type is `google` | Block containing `credentials_path` 
to a service account file (if using). Will default to using Application Default 
Credentials. |
+| `auth.entra`     | If type is `entra` | Block containing Entra ID 
configuration. Will default to using DefaultAzureCredential. |
 
 ###### Examples
 
@@ -578,22 +580,38 @@ catalog:
 
 See [OneLake table APIs for Iceberg](https://aka.ms/onelakeircdocs) for 
detailed documentation.
 
+Using Entra ID authentication (recommended):
+
+```yaml
+catalog:
+  onelake_catalog:
+    type: rest
+    uri: https://onelake.table.fabric.microsoft.com/iceberg
+    warehouse: <fabric_workspace_id>/<fabric_data_item_id>
+    auth:
+      type: entra
+    adls.account-name: onelake
+    adls.account-host: onelake.blob.fabric.microsoft.com
+```
+
+Using static token:
+
 ```yaml
 catalog:
   onelake_catalog:
     type: rest
     uri: https://onelake.table.fabric.microsoft.com/iceberg
     warehouse: <fabric_workspace_id>/<fabric_data_item_id> # Example : 
DB0CE1EE-B014-47D3-8F0C-9D64C39C0FC2/F470A1D2-6D6D-4C9D-8796-46286C80B7C0
-    token: <token>,
-    adls.account-name: onelake,
-    adls.account-host: onelake.blob.fabric.microsoft.com,
+    token: <token>
+    adls.account-name: onelake
+    adls.account-host: onelake.blob.fabric.microsoft.com
     adls.credential: <credential>
 ```
 
 <!-- prettier-ignore-start -->
 
-!!! Note "OneLake Authentication Models"
-    For Authentication: You can use DefautlAzureCredential from 
`azure.identity` package or refer to other [authentication 
flows](https://learn.microsoft.com/en-us/entra/identity-platform/authentication-flows-app-scenarios)
 for detailed documentation.
+!!! Note "OneLake Authentication"
+    Use the `entra` auth type for Entra ID (Azure AD) authentication via 
[DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains?tabs=dac#defaultazurecredential-overview),
 which supports environment variables, managed identity, Azure CLI, and more. 
Install with `pip install pyiceberg[entra-auth]`.
 <!-- prettier-ignore-end -->
 
 ### SQL Catalog
diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md
index 86736ec0..a37f3be8 100644
--- a/mkdocs/docs/index.md
+++ b/mkdocs/docs/index.md
@@ -63,6 +63,9 @@ You can mix and match optional dependencies depending on your 
needs:
 | rest-sigv4    | Support for generating AWS SIGv4 authentication headers for 
REST Catalogs |
 | pyiceberg-core | Installs iceberg-rust powered core                          
             |
 | datafusion    | Installs both PyArrow and Apache DataFusion                  
             |
+| hf            | Support for Hugging Face Hub                                 
             |
+| gcp-auth      | Support for Google Cloud authentication                      
             |
+| entra-auth    | Support for Azure Entra authentication                       
             |
 
 You either need to install `s3fs`, `adlfs`, `gcsfs`, or `pyarrow` to be able 
to fetch files from an object store.
 
diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py
index 72235d87..60207428 100644
--- a/pyiceberg/catalog/rest/auth.py
+++ b/pyiceberg/catalog/rest/auth.py
@@ -249,6 +249,68 @@ class GoogleAuthManager(AuthManager):
         return f"Bearer {self.credentials.token}"
 
 
+class EntraAuthManager(AuthManager):
+    """Auth Manager implementation that supports Microsoft Entra ID (Azure AD) 
authentication.
+
+    This manager uses the Azure Identity library's DefaultAzureCredential 
which automatically
+    tries multiple authentication methods including environment variables, 
managed identity,
+    and Azure CLI.
+
+    See 
https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains
+    for more details on DefaultAzureCredential.
+    """
+
+    DEFAULT_SCOPE = "https://storage.azure.com/.default";
+
+    def __init__(
+        self,
+        scopes: list[str] | None = None,
+        **credential_kwargs: Any,
+    ):
+        """
+        Initialize EntraAuthManager.
+
+        Args:
+            scopes: List of OAuth2 scopes. Defaults to 
["https://storage.azure.com/.default";].
+            **credential_kwargs: Arguments passed to DefaultAzureCredential.
+                Supported authentication methods:
+                - Environment Variables: Set AZURE_TENANT_ID, AZURE_CLIENT_ID, 
AZURE_CLIENT_SECRET
+                - Managed Identity: Works automatically on Azure; for 
user-assigned, pass managed_identity_client_id
+                - Azure CLI: Works automatically if logged in via `az login`
+                - Workload Identity: Works automatically in AKS with workload 
identity configured  # codespell:ignore aks
+        """
+        try:
+            from azure.identity import DefaultAzureCredential
+        except ImportError as e:
+            raise ImportError("Azure Identity library not found. Please 
install with: pip install pyiceberg[entra-auth]") from e
+
+        self._scopes = scopes or [self.DEFAULT_SCOPE]
+        self._lock = threading.Lock()
+        self._token: str | None = None
+        self._expires_at: float = 0
+        self._credential = DefaultAzureCredential(**credential_kwargs)
+
+    def _refresh_token(self) -> None:
+        """Refresh the access token from Azure."""
+        token = self._credential.get_token(*self._scopes)
+        self._token = token.token
+        # expires_on is a Unix timestamp; add a 60-second margin for safety
+        self._expires_at = token.expires_on - 60
+
+    def _get_token(self) -> str:
+        """Get a valid access token, refreshing if necessary."""
+        with self._lock:
+            if not self._token or time.time() >= self._expires_at:
+                self._refresh_token()
+            if self._token is None:
+                raise ValueError("Failed to obtain Entra access token")
+            return self._token
+
+    def auth_header(self) -> str:
+        """Return the Authorization header value with a valid Bearer token."""
+        return f"Bearer {self._get_token()}"
+
+
 class AuthManagerAdapter(AuthBase):
     """A `requests.auth.AuthBase` adapter for integrating an `AuthManager` 
into a `requests.Session`.
 
@@ -330,3 +392,4 @@ AuthManagerFactory.register("basic", BasicAuthManager)
 AuthManagerFactory.register("legacyoauth2", LegacyOAuth2AuthManager)
 AuthManagerFactory.register("oauth2", OAuth2AuthManager)
 AuthManagerFactory.register("google", GoogleAuthManager)
+AuthManagerFactory.register("entra", EntraAuthManager)
diff --git a/pyproject.toml b/pyproject.toml
index 71b5ed28..8afcd7d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,6 +96,7 @@ hf = ["huggingface-hub>=0.24.0"]
 pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.9.0"]
 datafusion = ["datafusion>=51,<52"]
 gcp-auth = ["google-auth>=2.4.0"]
+entra-auth = ["azure-identity>=1.25.1"]
 
 [dependency-groups]
 dev = [
diff --git a/tests/catalog/test_rest_auth.py b/tests/catalog/test_rest_auth.py
index 2ef02ed0..ae5d40f5 100644
--- a/tests/catalog/test_rest_auth.py
+++ b/tests/catalog/test_rest_auth.py
@@ -22,7 +22,7 @@ import pytest
 import requests
 from requests_mock import Mocker
 
-from pyiceberg.catalog.rest.auth import AuthManagerAdapter, BasicAuthManager, 
GoogleAuthManager, NoopAuthManager
+from pyiceberg.catalog.rest.auth import AuthManagerAdapter, BasicAuthManager, 
EntraAuthManager, GoogleAuthManager, NoopAuthManager
 
 TEST_URI = "https://iceberg-test-catalog/";
 GOOGLE_CREDS_URI = "https://oauth2.googleapis.com/token";
@@ -153,3 +153,99 @@ def test_google_auth_manager_import_error() -> None:
     with patch.dict("sys.modules", {"google.auth": None, 
"google.auth.transport.requests": None}):
         with pytest.raises(ImportError, match="Google Auth libraries not 
found. Please install 'google-auth'."):
             GoogleAuthManager()
+
+
+@patch("azure.identity.DefaultAzureCredential")
+def test_entra_auth_manager_default_credential(mock_default_cred: MagicMock, 
rest_mock: Mocker) -> None:
+    """Test EntraAuthManager with DefaultAzureCredential."""
+    mock_credential_instance = MagicMock()
+    mock_token = MagicMock()
+    mock_token.token = "entra_default_token"
+    mock_token.expires_on = 9999999999  # Far future timestamp
+    mock_credential_instance.get_token.return_value = mock_token
+    mock_default_cred.return_value = mock_credential_instance
+
+    auth_manager = EntraAuthManager()
+    session = requests.Session()
+    session.auth = AuthManagerAdapter(auth_manager)
+    session.get(TEST_URI)
+
+    mock_default_cred.assert_called_once_with()
+    
mock_credential_instance.get_token.assert_called_once_with("https://storage.azure.com/.default";)
+    history = rest_mock.request_history
+    assert len(history) == 1
+    actual_headers = history[0].headers
+    assert actual_headers["Authorization"] == "Bearer entra_default_token"
+
+
+@patch("azure.identity.DefaultAzureCredential")
+def test_entra_auth_manager_with_managed_identity_client_id(mock_default_cred: 
MagicMock, rest_mock: Mocker) -> None:
+    """Test EntraAuthManager with managed_identity_client_id passed to 
DefaultAzureCredential."""
+    mock_credential_instance = MagicMock()
+    mock_token = MagicMock()
+    mock_token.token = "entra_mi_token"
+    mock_token.expires_on = 9999999999
+    mock_credential_instance.get_token.return_value = mock_token
+    mock_default_cred.return_value = mock_credential_instance
+
+    auth_manager = 
EntraAuthManager(managed_identity_client_id="user-assigned-client-id")
+    session = requests.Session()
+    session.auth = AuthManagerAdapter(auth_manager)
+    session.get(TEST_URI)
+
+    
mock_default_cred.assert_called_once_with(managed_identity_client_id="user-assigned-client-id")
+    
mock_credential_instance.get_token.assert_called_once_with("https://storage.azure.com/.default";)
+    history = rest_mock.request_history
+    assert len(history) == 1
+    actual_headers = history[0].headers
+    assert actual_headers["Authorization"] == "Bearer entra_mi_token"
+
+
+@patch("azure.identity.DefaultAzureCredential")
+def test_entra_auth_manager_custom_scopes(mock_default_cred: MagicMock, 
rest_mock: Mocker) -> None:
+    """Test EntraAuthManager with custom scopes."""
+    mock_credential_instance = MagicMock()
+    mock_token = MagicMock()
+    mock_token.token = "entra_custom_scope_token"
+    mock_token.expires_on = 9999999999
+    mock_credential_instance.get_token.return_value = mock_token
+    mock_default_cred.return_value = mock_credential_instance
+
+    custom_scopes = ["https://datalake.azure.net/.default";, 
"https://storage.azure.com/.default";]
+    auth_manager = EntraAuthManager(scopes=custom_scopes)
+    session = requests.Session()
+    session.auth = AuthManagerAdapter(auth_manager)
+    session.get(TEST_URI)
+
+    mock_default_cred.assert_called_once_with()
+    mock_credential_instance.get_token.assert_called_once_with(*custom_scopes)
+    history = rest_mock.request_history
+    assert len(history) == 1
+    actual_headers = history[0].headers
+    assert actual_headers["Authorization"] == "Bearer entra_custom_scope_token"
+
+
+def test_entra_auth_manager_import_error() -> None:
+    """Test EntraAuthManager raises ImportError if azure-identity is not 
installed."""
+    with patch.dict("sys.modules", {"azure.identity": None}):
+        with pytest.raises(ImportError, match="Azure Identity library not 
found"):
+            EntraAuthManager()
+
+
+@patch("azure.identity.DefaultAzureCredential")
+def test_entra_auth_manager_token_failure(mock_default_cred: MagicMock, 
rest_mock: Mocker) -> None:
+    """Test EntraAuthManager raises exception when token acquisition fails."""
+    mock_credential_instance = MagicMock()
+    mock_credential_instance.get_token.side_effect = Exception("Failed to 
acquire token")
+    mock_default_cred.return_value = mock_credential_instance
+
+    auth_manager = EntraAuthManager()
+    session = requests.Session()
+    session.auth = AuthManagerAdapter(auth_manager)
+
+    with pytest.raises(Exception, match="Failed to acquire token"):
+        session.get(TEST_URI)
+
+    # Verify no requests were made with a blank/missing auth header
+    history = rest_mock.request_history
+    assert len(history) == 0
diff --git a/uv.lock b/uv.lock
index 89b02d4f..112adebd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4361,6 +4361,9 @@ duckdb = [
 dynamodb = [
     { name = "boto3" },
 ]
+entra-auth = [
+    { name = "azure-identity" },
+]
 gcp-auth = [
     { name = "google-auth" },
 ]
@@ -4459,6 +4462,7 @@ notebook = [
 [package.metadata]
 requires-dist = [
     { name = "adlfs", marker = "extra == 'adlfs'", specifier = ">=2024.7.0" },
+    { name = "azure-identity", marker = "extra == 'entra-auth'", specifier = 
">=1.25.1" },
     { name = "bodo", marker = "extra == 'bodo'", specifier = ">=2025.7.4" },
     { name = "boto3", marker = "extra == 'dynamodb'", specifier = ">=1.24.59" 
},
     { name = "boto3", marker = "extra == 'glue'", specifier = ">=1.24.59" },
@@ -4502,7 +4506,7 @@ requires-dist = [
     { name = "thrift-sasl", marker = "extra == 'hive-kerberos'", specifier = 
">=0.4.3" },
     { name = "zstandard", specifier = ">=0.13.0,<1.0.0" },
 ]
-provides-extras = ["pyarrow", "pandas", "duckdb", "ray", "bodo", "daft", 
"polars", "snappy", "hive", "hive-kerberos", "s3fs", "glue", "adlfs", 
"dynamodb", "bigquery", "sql-postgres", "sql-sqlite", "gcsfs", "rest-sigv4", 
"hf", "pyiceberg-core", "datafusion", "gcp-auth"]
+provides-extras = ["pyarrow", "pandas", "duckdb", "ray", "bodo", "daft", 
"polars", "snappy", "hive", "hive-kerberos", "s3fs", "glue", "adlfs", 
"dynamodb", "bigquery", "sql-postgres", "sql-sqlite", "gcsfs", "rest-sigv4", 
"hf", "pyiceberg-core", "datafusion", "gcp-auth", "entra-auth"]
 
 [package.metadata.requires-dev]
 dev = [

Reply via email to