This is an automated email from the ASF dual-hosted git repository.

Xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 2d5444b05 feat: HuggingFace Hub storage backend and CDC table 
properties (#2375)
2d5444b05 is described below

commit 2d5444b050bbdc1808786c6502fdabc35e49587c
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Thu May 21 10:02:11 2026 +0200

    feat: HuggingFace Hub storage backend and CDC table properties (#2375)
    
    ## Which issue does this PR close?
    
    - Closes #.
    
    ## What changes are included in this PR?
    
    Adds two opt-in capabilities for storing Iceberg tables on HuggingFace
    Hub with content-defined chunking for efficient deduplication.
    
    ### HuggingFace Hub storage
    
    New `opendal-hf` feature on `iceberg-storage-opendal` (off by default,
    included in `opendal-all`) that wires HuggingFace's OpenDAL service into
    `FileIO`. Paths use the form:
    
    `hf://<repo_type>/<owner>/<repo>[@<revision>]/<path_in_repo>`
    
    where `repo_type` must be one of `models`, `datasets`, `spaces`, or
    `buckets`. The prefix is mandatory. Configuration via `FileIOBuilder`
    properties:
    - `hf.token` — API token (required for private repos / writes)
    - `hf.endpoint` — Hub endpoint, defaults to https://huggingface.co
    - `hf.revision` — fallback revision when a path has no `@<revision>`
    
    `OpenDalResolvingStorage` recognises the `hf` scheme and lazily
    constructs a per-scheme storage instance. `delete_stream` groups paths
    by `<repo_type>/<repo_id>` so bucket and dataset paths to the same repo
    do not share an operator.
    
    ### CDC (content-defined chunking) table properties
    
    New table properties under `write.parquet.content-defined-chunking.*`
    (matching PyIceberg convention):
    - `write.parquet.content-defined-chunking.enabled` (bool, default false)
    - `write.parquet.content-defined-chunking.min-chunk-size` (bytes,
    default 256 KiB)
    - `write.parquet.content-defined-chunking.max-chunk-size` (bytes,
    default 1 MiB)
    - `write.parquet.content-defined-chunking.norm-level` (i32, default 0)
    
    CDC activates only when `enabled = "true"` is set explicitly. Defaults
    match parquet's own `CdcOptions` defaults. CDC options are applied in
    the DataFusion physical write plan.
    
    ## Are these changes tested?
    
    - Rust unit tests for `HfUri` parsing and CDC property parsing.
    - Rust integration tests in `file_io_hf_test.rs` guarded on
    `HF_OPENDAL_TOKEN`, `HF_OPENDAL_BUCKET`, `HF_OPENDAL_DATASET`; tests
    skip gracefully when env vars are unset.
    - Python tests in `test_huggingface_and_cdc.py` covering CDC property
    persistence, PyIceberg writes with CDC, DataFusion read-back, and HF
    credentials end-to-end (skipped without `HF_OPENDAL_TOKEN` /
    `HF_OPENDAL_TABLE_METADATA`).
---
 .github/workflows/ci_hf_cdc.yml                    | 110 ++++++
 bindings/python/pyproject.toml                     |   3 +
 bindings/python/tests/test_huggingface_and_cdc.py  | 194 +++++++++++
 bindings/python/uv.lock                            | 223 ++++++++++++
 crates/iceberg/src/io/storage/config/hf.rs         | 104 ++++++
 crates/iceberg/src/io/storage/config/mod.rs        |   2 +
 crates/iceberg/src/spec/table_properties.rs        | 189 ++++++++++-
 .../datafusion/src/physical_plan/write.rs          |  13 +-
 crates/storage/opendal/Cargo.toml                  |   3 +-
 crates/storage/opendal/src/hf.rs                   | 348 +++++++++++++++++++
 crates/storage/opendal/src/lib.rs                  |  57 +++-
 crates/storage/opendal/src/resolving.rs            |  15 +-
 crates/storage/opendal/tests/file_io_hf_test.rs    | 376 +++++++++++++++++++++
 13 files changed, 1625 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci_hf_cdc.yml b/.github/workflows/ci_hf_cdc.yml
new file mode 100644
index 000000000..78cd50d0a
--- /dev/null
+++ b/.github/workflows/ci_hf_cdc.yml
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: HuggingFace and CDC Integration Tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    paths:
+      - 'crates/storage/opendal/**'
+      - 'crates/iceberg/src/io/**'
+      - 'crates/iceberg/src/spec/table_properties.rs'
+      - 'crates/integrations/datafusion/**'
+      - 'bindings/python/tests/test_huggingface_and_cdc.py'
+      - '.github/workflows/ci_hf_cdc.yml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  hf-integration:
+    name: HuggingFace Hub integration tests
+    runs-on: ubuntu-latest
+    # Skip the job entirely when HF secrets are not available (e.g. PRs from 
forks).
+    if: ${{ secrets.HF_TOKEN != '' }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 
v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Setup Rust toolchain
+        uses: ./.github/actions/setup-builder
+
+      - name: Cache Rust artifacts
+        uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
+
+      - name: Install protoc
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # 
v3.0.0
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Run Rust HF integration tests
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_BUCKET: ${{ secrets.HF_BUCKET }}
+          HF_DATASET: ${{ secrets.HF_DATASET }}
+        run: |
+          cargo test -p iceberg-storage-opendal \
+            --features opendal-hf \
+            --test file_io_hf_test \
+            -- --test-threads=1
+
+  cdc-python:
+    name: CDC and HuggingFace Python tests
+    runs-on: ubuntu-latest
+    # Skip when HF secrets are not available (e.g. PRs from forks).
+    if: ${{ secrets.HF_TOKEN != '' }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 
v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # 
v6.2.0
+        with:
+          python-version: "3.12"
+
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # 
v1.51.0
+        with:
+          working-directory: "bindings/python"
+          command: build
+          args: --out dist -i python3.12
+
+      - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # 
v8.1.0
+        with:
+          version: "0.9.3"
+          enable-cache: true
+
+      - name: Install dependencies
+        working-directory: "bindings/python"
+        run: |
+          make install
+          uv pip install --no-build --reinstall --find-links dist/ 
pyiceberg-core
+
+      - name: Run CDC and HuggingFace Python tests
+        working-directory: "bindings/python"
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_DATASET: ${{ secrets.HF_DATASET }}
+        run: |
+          uv run --no-sync pytest tests/test_huggingface_and_cdc.py -v
diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml
index 0933bdc5e..f23944332 100644
--- a/bindings/python/pyproject.toml
+++ b/bindings/python/pyproject.toml
@@ -54,6 +54,8 @@ ignore = ["F403", "F405"]
 [tool.pytest.ini_options]
 filterwarnings = [
   "error",
+  # huggingface_hub uses hf_xet.upload_files() internally which is deprecated 
in hf_xet
+  "ignore::DeprecationWarning:huggingface_hub",
 ]
 
 [dependency-groups]
@@ -68,4 +70,5 @@ dev = [
     "pyiceberg[sql-sqlite]>=0.11",
     "pyarrow>=17",
     "fastavro>=1.11.1",
+    "huggingface_hub>=0.20",
 ]
diff --git a/bindings/python/tests/test_huggingface_and_cdc.py 
b/bindings/python/tests/test_huggingface_and_cdc.py
new file mode 100644
index 000000000..7e69ffe67
--- /dev/null
+++ b/bindings/python/tests/test_huggingface_and_cdc.py
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Tests for HuggingFace Hub URI support and CDC (content-defined chunking) 
options.
+
+CDC options are standard Iceberg table properties and work in both Rust and 
PyIceberg
+automatically — no special API is required beyond setting string properties.
+
+HF credentials are passed as file_io_properties to IcebergDataFusionTable.
+Tests requiring live HF credentials are skipped when HF_TOKEN or HF_DATASET is 
not set.
+"""
+
+import os
+import pytest
+import pyarrow as pa
+import datafusion
+from datafusion import SessionContext
+from packaging.version import Version
+from pyiceberg.catalog import load_catalog
+from pyiceberg_core.datafusion import IcebergDataFusionTable
+
+requires_datafusion_53 = pytest.mark.skipif(
+    Version(datafusion.__version__) < Version("53.0.0"),
+    reason="IcebergDataFusionTable requires datafusion>=53 for FFI 
compatibility",
+)
+
+
+# ---------------------------------------------------------------------------
+# CDC tests — run without any external credentials
+# ---------------------------------------------------------------------------
+
+
[email protected](scope="module")
+def local_catalog(tmp_path_factory: pytest.TempPathFactory):
+    warehouse = tmp_path_factory.mktemp("cdc_warehouse")
+    return load_catalog(
+        "default",
+        **{
+            "uri": f"sqlite:///{warehouse}/pyiceberg_catalog.db",
+            "warehouse": f"file://{warehouse}",
+        },
+    )
+
+
[email protected](scope="module")
+def sample_table() -> pa.Table:
+    return pa.table(
+        {
+            "id": pa.array(list(range(1000)), type=pa.int32()),
+            "payload": pa.array(
+                [f"row-{i:06d}" for i in range(1000)], type=pa.large_utf8()
+            ),
+        }
+    )
+
+
+def test_cdc_table_properties_are_persisted(local_catalog, sample_table):
+    """Table properties with CDC options are stored and returned as-is."""
+    local_catalog.create_namespace_if_not_exists("cdc_ns")
+
+    # Use values that differ from parquet defaults (256 KiB min, 1 MiB max, 0 
norm).
+    tbl = local_catalog.create_table_if_not_exists(
+        "cdc_ns.cdc_persist",
+        schema=sample_table.schema,
+        properties={
+            "write.parquet.content-defined-chunking.min-chunk-size": "65536",
+            "write.parquet.content-defined-chunking.max-chunk-size": "524288",
+            "write.parquet.content-defined-chunking.norm-level": "2",
+        },
+    )
+
+    props = tbl.properties
+    assert props.get("write.parquet.content-defined-chunking.min-chunk-size") 
== "65536"
+    assert (
+        props.get("write.parquet.content-defined-chunking.max-chunk-size") == 
"524288"
+    )
+    assert props.get("write.parquet.content-defined-chunking.norm-level") == 
"2"
+
+
+def test_cdc_write_via_pyiceberg(local_catalog, sample_table):
+    """PyIceberg tbl.append() writes parquet with CDC options when properties 
are set."""
+    local_catalog.create_namespace_if_not_exists("cdc_ns")
+
+    tbl = local_catalog.create_table_if_not_exists(
+        "cdc_ns.cdc_pyiceberg_write",
+        schema=sample_table.schema,
+        properties={"write.parquet.content-defined-chunking.enabled": "true"},
+    )
+    tbl.append(sample_table)
+
+    result = tbl.scan().to_arrow()
+    assert len(result) == len(sample_table)
+
+
+@requires_datafusion_53
+def test_cdc_write_and_read_via_datafusion(local_catalog, sample_table):
+    """A table with CDC properties can be written and read back via 
DataFusion."""
+    local_catalog.create_namespace_if_not_exists("cdc_ns")
+
+    tbl = local_catalog.create_table_if_not_exists(
+        "cdc_ns.cdc_write_read",
+        schema=sample_table.schema,
+        properties={"write.parquet.content-defined-chunking.enabled": "true"},
+    )
+    tbl.append(sample_table)
+
+    provider = IcebergDataFusionTable(
+        identifier=tbl.name(),
+        metadata_location=tbl.metadata_location,
+        file_io_properties=tbl.io.properties,
+    )
+
+    ctx = SessionContext()
+    ctx.register_table("cdc_table", provider)
+    assert ctx.table("cdc_table").count() == len(sample_table)
+
+
+# ---------------------------------------------------------------------------
+# HF + CDC tests — skipped when HF_TOKEN or HF_DATASET is not set
+# ---------------------------------------------------------------------------
+
+requires_hf = pytest.mark.skipif(
+    not os.environ.get("HF_TOKEN") or not os.environ.get("HF_DATASET"),
+    reason="HF_TOKEN or HF_DATASET not set",
+)
+
+
[email protected](scope="module")
+def hf_cdc_table(sample_table):
+    """Write a CDC-enabled Iceberg table to HF Hub once; share across HF tests.
+
+    Uses FsspecFileIO backed by huggingface_hub's HfFileSystem (hf:// in 
fsspec).
+    HF_TOKEN is read from the environment automatically by HfFileSystem.
+    """
+    token = os.environ["HF_TOKEN"]
+    dataset = os.environ["HF_DATASET"]
+
+    warehouse = f"hf://datasets/{dataset}/iceberg-ci-{os.getpid()}"
+    catalog = load_catalog(
+        "hf_test",
+        **{
+            "uri": "sqlite:///:memory:",
+            "warehouse": warehouse,
+            "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO",
+        },
+    )
+    catalog.create_namespace("ns")
+    tbl = catalog.create_table(
+        "ns.cdc_tbl",
+        schema=sample_table.schema,
+        properties={"write.parquet.content-defined-chunking.enabled": "true"},
+    )
+    tbl.append(sample_table)
+    # HfFileSystem.dircache may reflect the pre-write state; invalidate it so
+    # subsequent reads (info/open) see the files just uploaded via xet.
+    tbl.io.get_fs("hf").invalidate_cache()
+    return tbl, token
+
+
+@requires_hf
+def test_hf_cdc_write_and_read_via_pyarrow(hf_cdc_table, sample_table):
+    """PyIceberg writes CDC parquet to HF Hub; PyArrow scan reads it back."""
+    tbl, _ = hf_cdc_table
+    result = tbl.scan().to_arrow()
+    assert len(result) == len(sample_table)
+
+
+@requires_hf
+@requires_datafusion_53
+def test_hf_cdc_write_and_read_via_datafusion(hf_cdc_table, sample_table):
+    """PyIceberg writes CDC parquet to HF Hub; IcebergDataFusionTable reads it 
back via opendal-hf."""
+    tbl, token = hf_cdc_table
+    provider = IcebergDataFusionTable(
+        identifier=tbl.name(),
+        metadata_location=tbl.metadata_location,
+        file_io_properties={"hf.token": token},
+    )
+    ctx = SessionContext()
+    ctx.register_table("hf_table", provider)
+    assert ctx.table("hf_table").count() == len(sample_table)
diff --git a/bindings/python/uv.lock b/bindings/python/uv.lock
index c346e3245..26a9f5df3 100644
--- a/bindings/python/uv.lock
+++ b/bindings/python/uv.lock
@@ -6,6 +6,15 @@ resolution-markers = [
     "python_full_version < '3.14'",
 ]
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz";,
 hash = 
"sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size 
= 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl";,
 hash = 
"sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size 
= 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -15,6 +24,20 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl";,
 hash = 
"sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size 
= 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "anyio"
+version = "4.13.0"
+source = { registry = "https://pypi.org/simple"; }
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = 
"https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz";,
 hash = 
"sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size 
= 231622, upload-time = "2026-03-24T12:59:09.671Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl";,
 hash = 
"sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size 
= 114353, upload-time = "2026-03-24T12:59:08.246Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "6.2.6"
@@ -240,6 +263,15 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/32/f1/f21bd5319113e89ceceed2df840df21e9c5150d181db74b6ba80400f9f48/fastavro-1.12.2-cp314-cp314t-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:afede7324822800e4f90e96b9514188a237a60f35e8e7a10b2129c10c78f6e4d", size 
= 3356664, upload-time = "2026-04-24T14:37:34.231Z" },
 ]
 
+[[package]]
+name = "filelock"
+version = "3.29.0"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/b5/fe/997687a931ab51049acce6fa1f23e8f01216374ea81374ddee763c493db5/filelock-3.29.0.tar.gz";,
 hash = 
"sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", size 
= 57571, upload-time = "2026-04-19T15:39:10.068Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl";,
 hash = 
"sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size 
= 39812, upload-time = "2026-04-19T15:39:08.752Z" },
+]
+
 [[package]]
 name = "fsspec"
 version = "2026.4.0"
@@ -303,6 +335,95 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/15/32/77ee8a6c1564fc345a491a4e85b3bf360e4cf26eac98c4532d2fdb96e01f/greenlet-3.5.0-cp314-cp314t-win_amd64.whl";,
 hash = 
"sha256:d60097128cb0a1cab9ea541186ea13cd7b847b8449a7787c2e2350da0cb82d86", size 
= 245324, upload-time = "2026-04-27T12:24:40.295Z" },
 ]
 
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz";,
 hash = 
"sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size 
= 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl";,
 hash = 
"sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size 
= 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "hf-xet"
+version = "1.5.0"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/74/d8/5c06fc76461418326a7decf8367480c35be11a41fd938633929c60a9ec6b/hf_xet-1.5.0.tar.gz";,
 hash = 
"sha256:e0fb0a34d9f406eed88233e829a67ec016bec5af19e480eac65a233ea289a948", size 
= 837196, upload-time = "2026-05-06T06:18:15.583Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/68/9b/6912c99070915a4f28119e3c5b52a9abd1eec0ad5cb293b8c967a0c6f5a2/hf_xet-1.5.0-cp313-cp313t-macosx_10_12_x86_64.whl";,
 hash = 
"sha256:7d70fe2ce97b9db73b9c9b9c81fe3693640aec83416a966c446afea54acfae3c", size 
= 4023383, upload-time = "2026-05-06T06:17:53.947Z" },
+    { url = 
"https://files.pythonhosted.org/packages/0f/6d/9563cfde59b5d8128a9c7ec972a087f4c782e4f7bac5a85234edfd5d5e49/hf_xet-1.5.0-cp313-cp313t-macosx_11_0_arm64.whl";,
 hash = 
"sha256:73a0dae8c71de3b0633a45c73f4a4a5ed09e94b43441d82981a781d4f12baa42", size 
= 3792751, upload-time = "2026-05-06T06:17:51.791Z" },
+    { url = 
"https://files.pythonhosted.org/packages/07/a5/ed5a0cf35b49a0571af5a8f53416dad1877a718c021c9937c3a53cb45781/hf_xet-1.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl";,
 hash = 
"sha256:a60290ec57e9b71767fba7c3645ddafdd0759974b540441510c629c6db6db24a", size 
= 4456058, upload-time = "2026-05-06T06:17:40.735Z" },
+    { url = 
"https://files.pythonhosted.org/packages/60/fb/3ae8bf2a7a37a4197d0195d7247fd25b3952e15cb8a599e285dfaa6f52b3/hf_xet-1.5.0-cp313-cp313t-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:e5de0f6deada0dada870bb376a11bcd1f08abf3a968a6d118f33e72d1b1eb480", size 
= 4250783, upload-time = "2026-05-06T06:17:38.412Z" },
+    { url = 
"https://files.pythonhosted.org/packages/a2/9b/8bae40d4d91525085137196e84eb0ed49cf65b5e96e5c3ecdadd8bd0fac2/hf_xet-1.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:c799d49f1a5544a0ef7591c0ee75e0d6b93d6f56dc7a4979f59f7518d2872216", size 
= 4445594, upload-time = "2026-05-06T06:18:04.219Z" },
+    { url = 
"https://files.pythonhosted.org/packages/13/59/c74efbbd4e8728172b2cc72a2bc014d2947a4b7bdced932fbd3f5da1a4e5/hf_xet-1.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:2baea1b0b989e5c152fe81425f7745ddc8901280ba3d97c98d8cdece7b706c60", size 
= 4663995, upload-time = "2026-05-06T06:18:06.1Z" },
+    { url = 
"https://files.pythonhosted.org/packages/73/32/8e1e0410af64cda9b139d1dcebdc993a8ff9c8c7c0e2696ae356d75ccc0d/hf_xet-1.5.0-cp313-cp313t-win_amd64.whl";,
 hash = 
"sha256:526345b3ed45f374f6317349df489167606736c876241ba984105afe7fd4839d", size 
= 3966608, upload-time = "2026-05-06T06:18:19.74Z" },
+    { url = 
"https://files.pythonhosted.org/packages/fc/34/a8febc8f4edbea8b3e21b02ebc8b628679b84ba7e45cde624a7736b51500/hf_xet-1.5.0-cp313-cp313t-win_arm64.whl";,
 hash = 
"sha256:786d28e2eb8315d5035544b9d137b4a842d600c434bb91bf7d0d953cce906ad4", size 
= 3796946, upload-time = "2026-05-06T06:18:17.568Z" },
+    { url = 
"https://files.pythonhosted.org/packages/2a/20/8fc8996afe5815fa1a6be8e9e5c02f24500f409d599e905800d498a4e14d/hf_xet-1.5.0-cp314-cp314t-macosx_10_12_x86_64.whl";,
 hash = 
"sha256:872d5601e6deea30d15865ede55d29eac6daf5a534ab417b99b6ef6b076dd96c", size 
= 4023495, upload-time = "2026-05-06T06:18:01.94Z" },
+    { url = 
"https://files.pythonhosted.org/packages/32/6a/93d84463c00cecb561a7508aa6303e35ee2894294eac14245526924415fe/hf_xet-1.5.0-cp314-cp314t-macosx_11_0_arm64.whl";,
 hash = 
"sha256:9929561f5abf4581c8ea79587881dfef6b8abb2a0d8a51915936fc2a614f4e73", size 
= 3792731, upload-time = "2026-05-06T06:18:00.021Z" },
+    { url = 
"https://files.pythonhosted.org/packages/9d/5a/8ec8e0c863b382d00b3c2e2af6ded6b06371be617144a625903a6d562f4b/hf_xet-1.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl";,
 hash = 
"sha256:f7b7bbae318e583a86fb21e5a4a175d6721d628a2874f4bd022d0e660c32a682", size 
= 4456738, upload-time = "2026-05-06T06:17:49.574Z" },
+    { url = 
"https://files.pythonhosted.org/packages/c5/ca/f7effa1a67717da2bcc6b6c28f71c6ca648c77acaec4e2c32f40cbe16d85/hf_xet-1.5.0-cp314-cp314t-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:cf7b2dc6f31a4ea754bb50f74cde482dcf5d366d184076d8530b9872787f3761", size 
= 4251622, upload-time = "2026-05-06T06:17:47.096Z" },
+    { url = 
"https://files.pythonhosted.org/packages/65/f2/19247dba3e231cf77dec59ddfb878f00057635ff773d099c9b59d37812c3/hf_xet-1.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:8dbcbab554c9ef158ef2c991545c3e970ddd8cc7acdcd0a78c5a41095dab4ded", size 
= 4445667, upload-time = "2026-05-06T06:18:11.983Z" },
+    { url = 
"https://files.pythonhosted.org/packages/7f/64/6f116801a3bcfb6f59f5c251f48cadc47ea54026441c4a385079286a94fa/hf_xet-1.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:5906bf7718d3636dc13402914736abe723492cb730f744834f5f5b67d3a12702", size 
= 4664619, upload-time = "2026-05-06T06:18:13.771Z" },
+    { url = 
"https://files.pythonhosted.org/packages/5c/e8/069542d37946ed08669b127e1496fa99e78196d71de8d41eda5e9f1b7a58/hf_xet-1.5.0-cp314-cp314t-win_amd64.whl";,
 hash = 
"sha256:5f3dc2248fc01cc0a00cd392ab497f1ca373fcbc7e3f2da1f452480b384e839e", size 
= 3966802, upload-time = "2026-05-06T06:18:28.162Z" },
+    { url = 
"https://files.pythonhosted.org/packages/f9/91/fc6fdec27b14d04e88c386ac0a0129732b53fa23f7c4a78f4b83a039c567/hf_xet-1.5.0-cp314-cp314t-win_arm64.whl";,
 hash = 
"sha256:b285cea1b5bab46b758772716ba8d6854a1a0310fed1c249d678a8b38601e5a0", size 
= 3797168, upload-time = "2026-05-06T06:18:26.287Z" },
+    { url = 
"https://files.pythonhosted.org/packages/3d/fb/69ff198a82cae7eb1a69fb84d93b3a3e4816564d76817fe541ddc96874eb/hf_xet-1.5.0-cp37-abi3-macosx_10_12_x86_64.whl";,
 hash = 
"sha256:dad0dc84e941b8ba3c860659fe1fdc35c049d47cce293f003287757e971a8f56", size 
= 4030814, upload-time = "2026-05-06T06:17:57.933Z" },
+    { url = 
"https://files.pythonhosted.org/packages/9b/ff/edcc2b40162bef3ff78e14ab637e5f3b89243d6aee72f5949d3bb6a5af83/hf_xet-1.5.0-cp37-abi3-macosx_11_0_arm64.whl";,
 hash = 
"sha256:fd6e5a9b0fdac4ed03ed45ef79254a655b1aaab514a02202617fbf643f5fdf7a", size 
= 3798444, upload-time = "2026-05-06T06:17:55.79Z" },
+    { url = 
"https://files.pythonhosted.org/packages/49/4d/103f76b04310e5e57656696cc184690d20c466af0bca3ca88f8c8ea5d4f3/hf_xet-1.5.0-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl";,
 hash = 
"sha256:3531b1823a0e6d77d80f9ed15ca0e00f0d115094f8ac033d5cae88f4564cc949", size 
= 4465986, upload-time = "2026-05-06T06:17:44.886Z" },
+    { url = 
"https://files.pythonhosted.org/packages/c4/a2/546f47f464737b3edbab6f8ddb57f2599b93d2cbb66f06abb475ccb48651/hf_xet-1.5.0-cp37-abi3-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:9a0ee58cd18d5ea799f7ed11290bbccbe56bdd8b1d97ca74b9cc49a3945d7a3b", size 
= 4259865, upload-time = "2026-05-06T06:17:42.639Z" },
+    { url = 
"https://files.pythonhosted.org/packages/95/7f/1be593c1f28613be2e196473481cd81bfc5910795e30a34e8f744f6cac4f/hf_xet-1.5.0-cp37-abi3-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:1e60df5a42e9bed8628b6416af2cba4cba57ae9f02de226a06b020d98e1aab18", size 
= 4459835, upload-time = "2026-05-06T06:18:08.026Z" },
+    { url = 
"https://files.pythonhosted.org/packages/aa/b2/703569fc881f3284487e68cda7b42179978480da3c438042a6bbbb4a671c/hf_xet-1.5.0-cp37-abi3-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:4b35549ce62601b84da4ff9b24d970032ace3d4430f52d91bcbb26c901d6c690", size 
= 4672414, upload-time = "2026-05-06T06:18:09.864Z" },
+    { url = 
"https://files.pythonhosted.org/packages/af/37/1b6def445c567286b50aa3b33828158e135b1be44938dde59f11382a500c/hf_xet-1.5.0-cp37-abi3-win_amd64.whl";,
 hash = 
"sha256:2806c7c17b4d23f8d88f7c4814f838c3b6150773fe339c20af23e1cfaf2797e4", size 
= 3977238, upload-time = "2026-05-06T06:18:23.621Z" },
+    { url = 
"https://files.pythonhosted.org/packages/62/94/3b66b148778ee100dcfd69c2ca22b57b41b44d3063ceec934f209e9184ce/hf_xet-1.5.0-cp37-abi3-win_arm64.whl";,
 hash = 
"sha256:b6c9df403040248c76d808d3e047d64db2d923bae593eb244c41e425cf6cd7be", size 
= 3806916, upload-time = "2026-05-06T06:18:21.7Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple"; }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = 
"https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz";,
 hash = 
"sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size 
= 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl";,
 hash = 
"sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size 
= 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple"; }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = 
"https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz";,
 hash = 
"sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size 
= 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl";,
 hash = 
"sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size 
= 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "huggingface-hub"
+version = "1.15.0"
+source = { registry = "https://pypi.org/simple"; }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "hf-xet", marker = "platform_machine == 'AMD64' or 
platform_machine == 'aarch64' or platform_machine == 'amd64' or 
platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "httpx" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "tqdm" },
+    { name = "typer" },
+    { name = "typing-extensions" },
+]
+sdist = { url = 
"https://files.pythonhosted.org/packages/bb/b6/e22bd20a25299c34b8c5922c1545a6320825b13906eb0f7298edfd034a0b/huggingface_hub-1.15.0.tar.gz";,
 hash = 
"sha256:28abfdddda3927fd4de6a63cf26ab012498a2c24dae52baf150c5c6edf98a1d5", size 
= 784100, upload-time = "2026-05-15T11:42:52.149Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/6e/11/0b64cc9024329b76d7547c19a67604a61d21d3ba678a69d1b220c29d5112/huggingface_hub-1.15.0-py3-none-any.whl";,
 hash = 
"sha256:a4a59af04cbc41a3fe3fec429b171ef994ef8c971eda10136746f408dd4e3744", size 
= 663602, upload-time = "2026-05-15T11:42:50.487Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.15"
@@ -758,6 +879,7 @@ source = { editable = "." }
 dev = [
     { name = "datafusion" },
     { name = "fastavro" },
+    { name = "huggingface-hub" },
     { name = "maturin" },
     { name = "pyarrow" },
     { name = "pyiceberg", extra = ["sql-sqlite"] },
@@ -770,6 +892,7 @@ dev = [
 dev = [
     { name = "datafusion", specifier = "==52.*" },
     { name = "fastavro", specifier = ">=1.11.1" },
+    { name = "huggingface-hub", specifier = ">=0.20" },
     { name = "maturin", specifier = ">=1.0,<2.0" },
     { name = "pyarrow", specifier = ">=17" },
     { name = "pyiceberg", extras = ["sql-sqlite"], specifier = ">=0.11" },
@@ -895,6 +1018,70 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl";,
 hash = 
"sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size 
= 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]
 
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz";,
 hash = 
"sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size 
= 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl";,
 hash = 
"sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size 
= 184227, upload-time = "2025-09-25T21:31:46.04Z" },
+    { url = 
"https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl";,
 hash = 
"sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size 
= 174019, upload-time = "2025-09-25T21:31:47.706Z" },
+    { url = 
"https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size 
= 740646, upload-time = "2025-09-25T21:31:49.21Z" },
+    { url = 
"https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl";,
 hash = 
"sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size 
= 840793, upload-time = "2025-09-25T21:31:50.735Z" },
+    { url = 
"https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size 
= 770293, upload-time = "2025-09-25T21:31:51.828Z" },
+    { url = 
"https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size 
= 732872, upload-time = "2025-09-25T21:31:53.282Z" },
+    { url = 
"https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size 
= 758828, upload-time = "2025-09-25T21:31:54.807Z" },
+    { url = 
"https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl";,
 hash = 
"sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size 
= 142415, upload-time = "2025-09-25T21:31:55.885Z" },
+    { url = 
"https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl";,
 hash = 
"sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size 
= 158561, upload-time = "2025-09-25T21:31:57.406Z" },
+    { url = 
"https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl";,
 hash = 
"sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size 
= 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+    { url = 
"https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl";,
 hash = 
"sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size 
= 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+    { url = 
"https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size 
= 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+    { url = 
"https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl";,
 hash = 
"sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size 
= 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+    { url = 
"https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size 
= 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+    { url = 
"https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size 
= 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+    { url = 
"https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size 
= 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+    { url = 
"https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl";,
 hash = 
"sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size 
= 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+    { url = 
"https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl";,
 hash = 
"sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size 
= 158763, upload-time = "2025-09-25T21:32:09.96Z" },
+    { url = 
"https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl";,
 hash = 
"sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size 
= 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+    { url = 
"https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl";,
 hash = 
"sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size 
= 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+    { url = 
"https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size 
= 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+    { url = 
"https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl";,
 hash = 
"sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size 
= 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+    { url = 
"https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size 
= 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+    { url = 
"https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size 
= 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+    { url = 
"https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size 
= 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+    { url = 
"https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl";,
 hash = 
"sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size 
= 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+    { url = 
"https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl";,
 hash = 
"sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size 
= 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+    { url = 
"https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl";,
 hash = 
"sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size 
= 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = 
"https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl";,
 hash = 
"sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size 
= 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = 
"https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl";,
 hash = 
"sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size 
= 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = 
"https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size 
= 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = 
"https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl";,
 hash = 
"sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size 
= 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = 
"https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size 
= 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = 
"https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size 
= 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = 
"https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size 
= 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = 
"https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl";,
 hash = 
"sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size 
= 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = 
"https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl";,
 hash = 
"sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size 
= 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = 
"https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl";,
 hash = 
"sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size 
= 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = 
"https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl";,
 hash = 
"sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size 
= 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = 
"https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl";,
 hash = 
"sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size 
= 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = 
"https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size 
= 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = 
"https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl";,
 hash = 
"sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size 
= 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = 
"https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size 
= 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = 
"https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size 
= 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = 
"https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size 
= 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = 
"https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl";,
 hash = 
"sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size 
= 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = 
"https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl";,
 hash = 
"sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size 
= 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = 
"https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl";,
 hash = 
"sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size 
= 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = 
"https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl";,
 hash = 
"sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size 
= 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = 
"https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size 
= 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = 
"https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl";,
 hash = 
"sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size 
= 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = 
"https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size 
= 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = 
"https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl";,
 hash = 
"sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size 
= 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = 
"https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl";,
 hash = 
"sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size 
= 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = 
"https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl";,
 hash = 
"sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size 
= 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = 
"https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl";,
 hash = 
"sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size 
= 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+]
+
 [[package]]
 name = "requests"
 version = "2.33.1"
@@ -923,6 +1110,15 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/b3/76/6d163cfac87b632216f71879e6b2cf17163f773ff59c00b5ff4900a80fa3/rich-14.3.4-py3-none-any.whl";,
 hash = 
"sha256:07e7adb4690f68864777b1450859253bed81a99a31ac321ac1817b2313558952", size 
= 310480, upload-time = "2026-04-11T02:57:47.484Z" },
 ]
 
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz";,
 hash = 
"sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size 
= 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl";,
 hash = 
"sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size 
= 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -1067,6 +1263,33 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl";,
 hash = 
"sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size 
= 14583, upload-time = "2026-03-25T20:22:03.012Z" },
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.67.3"
+source = { registry = "https://pypi.org/simple"; }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = 
"https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz";,
 hash = 
"sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size 
= 169598, upload-time = "2026-02-03T17:35:53.048Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl";,
 hash = 
"sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size 
= 78374, upload-time = "2026-02-03T17:35:50.982Z" },
+]
+
+[[package]]
+name = "typer"
+version = "0.25.1"
+source = { registry = "https://pypi.org/simple"; }
+dependencies = [
+    { name = "annotated-doc" },
+    { name = "click" },
+    { name = "rich" },
+    { name = "shellingham" },
+]
+sdist = { url = 
"https://files.pythonhosted.org/packages/e4/51/9aed62104cea109b820bbd6c14245af756112017d309da813ef107d42e7e/typer-0.25.1.tar.gz";,
 hash = 
"sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc", size 
= 122276, upload-time = "2026-04-30T19:32:16.964Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl";,
 hash = 
"sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89", size 
= 58409, upload-time = "2026-04-30T19:32:18.271Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
diff --git a/crates/iceberg/src/io/storage/config/hf.rs 
b/crates/iceberg/src/io/storage/config/hf.rs
new file mode 100644
index 000000000..fdb79a5af
--- /dev/null
+++ b/crates/iceberg/src/io/storage/config/hf.rs
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! HuggingFace Hub storage configuration.
+
+use serde::{Deserialize, Serialize};
+use typed_builder::TypedBuilder;
+
+use super::StorageConfig;
+use crate::Result;
+
+/// HuggingFace Hub authentication token.
+pub const HF_TOKEN: &str = "hf.token";
+/// HuggingFace Hub endpoint URL. Defaults to `https://huggingface.co`.
+pub const HF_ENDPOINT: &str = "hf.endpoint";
+/// Default git revision/branch for all paths that don't specify one. Defaults 
to `main`.
+pub const HF_REVISION: &str = "hf.revision";
+
+/// HuggingFace Hub storage configuration.
+///
+/// Repo type, repo ID, and revision are normally encoded in the file path URI
+/// (`hf://<repo_type>/<owner>/<repo>[@<revision>]/<path>`, where `<repo_type>`
+/// is one of `models`, `datasets`, `spaces`, or `buckets`).
+/// The fields here provide credentials and a default revision fallback.
+#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, 
TypedBuilder)]
+pub struct HfConfig {
+    /// HuggingFace Hub API token (required for private repos and write 
access).
+    #[builder(default, setter(strip_option, into))]
+    pub token: Option<String>,
+    /// HuggingFace Hub endpoint. Defaults to `https://huggingface.co`.
+    #[builder(default, setter(strip_option, into))]
+    pub endpoint: Option<String>,
+    /// Default revision to use when a path URI does not specify one.
+    #[builder(default, setter(strip_option, into))]
+    pub revision: Option<String>,
+}
+
+impl TryFrom<&StorageConfig> for HfConfig {
+    type Error = crate::Error;
+
+    fn try_from(config: &StorageConfig) -> Result<Self> {
+        let props = config.props();
+        let mut cfg = HfConfig::default();
+        if let Some(token) = props.get(HF_TOKEN) {
+            cfg.token = Some(token.clone());
+        }
+        if let Some(endpoint) = props.get(HF_ENDPOINT) {
+            cfg.endpoint = Some(endpoint.clone());
+        }
+        if let Some(revision) = props.get(HF_REVISION) {
+            cfg.revision = Some(revision.clone());
+        }
+        Ok(cfg)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_hf_config_builder() {
+        let cfg = HfConfig::builder()
+            .token("hf_mytoken")
+            .endpoint("https://huggingface.co";)
+            .revision("dev")
+            .build();
+        assert_eq!(cfg.token.as_deref(), Some("hf_mytoken"));
+        assert_eq!(cfg.endpoint.as_deref(), Some("https://huggingface.co";));
+        assert_eq!(cfg.revision.as_deref(), Some("dev"));
+    }
+
+    #[test]
+    fn test_hf_config_from_storage_config() {
+        let storage_config = StorageConfig::new()
+            .with_prop(HF_TOKEN, "hf_abc123")
+            .with_prop(HF_ENDPOINT, "https://huggingface.co";);
+
+        let cfg = HfConfig::try_from(&storage_config).unwrap();
+        assert_eq!(cfg.token.as_deref(), Some("hf_abc123"));
+        assert_eq!(cfg.endpoint.as_deref(), Some("https://huggingface.co";));
+    }
+
+    #[test]
+    fn test_hf_config_empty() {
+        let cfg = HfConfig::try_from(&StorageConfig::new()).unwrap();
+        assert_eq!(cfg.token, None);
+        assert_eq!(cfg.endpoint, None);
+    }
+}
diff --git a/crates/iceberg/src/io/storage/config/mod.rs 
b/crates/iceberg/src/io/storage/config/mod.rs
index cbdb53730..2350aab6d 100644
--- a/crates/iceberg/src/io/storage/config/mod.rs
+++ b/crates/iceberg/src/io/storage/config/mod.rs
@@ -32,6 +32,7 @@
 
 mod azdls;
 mod gcs;
+mod hf;
 mod oss;
 mod s3;
 
@@ -39,6 +40,7 @@ use std::collections::HashMap;
 
 pub use azdls::*;
 pub use gcs::*;
+pub use hf::*;
 pub use oss::*;
 pub use s3::*;
 use serde::{Deserialize, Serialize};
diff --git a/crates/iceberg/src/spec/table_properties.rs 
b/crates/iceberg/src/spec/table_properties.rs
index a3d4e7fda..dc21da565 100644
--- a/crates/iceberg/src/spec/table_properties.rs
+++ b/crates/iceberg/src/spec/table_properties.rs
@@ -22,8 +22,6 @@ use std::str::FromStr;
 use crate::compression::CompressionCodec;
 use crate::error::{Error, ErrorKind, Result};
 
-// Helper function to parse a property from a HashMap
-// If the property is not found, use the default value
 fn parse_property<T: FromStr>(
     properties: &HashMap<String, String>,
     key: &str,
@@ -121,6 +119,15 @@ pub struct TableProperties {
     /// Whether garbage collection is enabled on drop.
     /// When `false`, data files will not be deleted when a table is dropped.
     pub gc_enabled: bool,
+    /// Whether content-defined chunking is enabled.
+    /// `true` only when `write.parquet.content-defined-chunking.enabled = 
"true"`.
+    pub cdc_enabled: bool,
+    /// Content-defined chunking minimum chunk size in bytes.
+    pub cdc_min_chunk_size: usize,
+    /// Content-defined chunking maximum chunk size in bytes.
+    pub cdc_max_chunk_size: usize,
+    /// Content-defined chunking normalization level (gearhash bit adjustment).
+    pub cdc_norm_level: i32,
 }
 
 impl TableProperties {
@@ -226,6 +233,26 @@ impl TableProperties {
     pub const PROPERTY_GC_ENABLED: &str = "gc.enabled";
     /// Default value for gc.enabled
     pub const PROPERTY_GC_ENABLED_DEFAULT: bool = true;
+
+    /// Enable content-defined chunking with parquet defaults (or per-property 
overrides).
+    pub const PROPERTY_PARQUET_CDC_ENABLED: &str = 
"write.parquet.content-defined-chunking.enabled";
+    /// Default value for content-defined chunking enabled.
+    pub const PROPERTY_PARQUET_CDC_ENABLED_DEFAULT: bool = false;
+    /// Minimum chunk size in bytes for content-defined chunking.
+    pub const PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE: &str =
+        "write.parquet.content-defined-chunking.min-chunk-size";
+    /// Default matches 
`parquet::file::properties::DEFAULT_CDC_MIN_CHUNK_SIZE`.
+    pub const PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE_DEFAULT: usize = 256 * 1024;
+    /// Maximum chunk size in bytes for content-defined chunking.
+    pub const PROPERTY_PARQUET_CDC_MAX_CHUNK_SIZE: &str =
+        "write.parquet.content-defined-chunking.max-chunk-size";
+    /// Default matches 
`parquet::file::properties::DEFAULT_CDC_MAX_CHUNK_SIZE`.
+    pub const PROPERTY_PARQUET_CDC_MAX_CHUNK_SIZE_DEFAULT: usize = 1024 * 1024;
+    /// Normalization level (gearhash bit adjustment) for content-defined 
chunking.
+    pub const PROPERTY_PARQUET_CDC_NORM_LEVEL: &str =
+        "write.parquet.content-defined-chunking.norm-level";
+    /// Default matches `parquet::file::properties::DEFAULT_CDC_NORM_LEVEL`.
+    pub const PROPERTY_PARQUET_CDC_NORM_LEVEL_DEFAULT: i32 = 0;
 }
 
 impl TryFrom<&HashMap<String, String>> for TableProperties {
@@ -275,6 +302,26 @@ impl TryFrom<&HashMap<String, String>> for TableProperties 
{
                 TableProperties::PROPERTY_GC_ENABLED,
                 TableProperties::PROPERTY_GC_ENABLED_DEFAULT,
             )?,
+            cdc_enabled: parse_property(
+                props,
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED,
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED_DEFAULT,
+            )?,
+            cdc_min_chunk_size: parse_property(
+                props,
+                TableProperties::PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE,
+                TableProperties::PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE_DEFAULT,
+            )?,
+            cdc_max_chunk_size: parse_property(
+                props,
+                TableProperties::PROPERTY_PARQUET_CDC_MAX_CHUNK_SIZE,
+                TableProperties::PROPERTY_PARQUET_CDC_MAX_CHUNK_SIZE_DEFAULT,
+            )?,
+            cdc_norm_level: parse_property(
+                props,
+                TableProperties::PROPERTY_PARQUET_CDC_NORM_LEVEL,
+                TableProperties::PROPERTY_PARQUET_CDC_NORM_LEVEL_DEFAULT,
+            )?,
         })
     }
 }
@@ -583,4 +630,142 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_cdc_disabled_by_default() {
+        let props = HashMap::new();
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert!(!tp.cdc_enabled);
+    }
+
+    #[test]
+    fn test_cdc_enabled_via_flag() {
+        let props = HashMap::from([(
+            TableProperties::PROPERTY_PARQUET_CDC_ENABLED.to_string(),
+            "true".to_string(),
+        )]);
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert!(tp.cdc_enabled);
+        assert_eq!(tp.cdc_min_chunk_size, 256 * 1024);
+        assert_eq!(tp.cdc_max_chunk_size, 1024 * 1024);
+        assert_eq!(tp.cdc_norm_level, 0);
+    }
+
+    #[test]
+    fn test_cdc_size_props_alone_do_not_enable() {
+        let props = HashMap::from([(
+            TableProperties::PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE.to_string(),
+            "262144".to_string(),
+        )]);
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert!(!tp.cdc_enabled);
+    }
+
+    #[test]
+    fn test_cdc_custom_values() {
+        let props = HashMap::from([
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED.to_string(),
+                "true".to_string(),
+            ),
+            (
+                
TableProperties::PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE.to_string(),
+                "200000".to_string(),
+            ),
+            (
+                
TableProperties::PROPERTY_PARQUET_CDC_MAX_CHUNK_SIZE.to_string(),
+                "900000".to_string(),
+            ),
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_NORM_LEVEL.to_string(),
+                "1".to_string(),
+            ),
+        ]);
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert!(tp.cdc_enabled);
+        assert_eq!(tp.cdc_min_chunk_size, 200000);
+        assert_eq!(tp.cdc_max_chunk_size, 900000);
+        assert_eq!(tp.cdc_norm_level, 1);
+    }
+
+    #[test]
+    fn test_cdc_partial_override() {
+        let props = HashMap::from([
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED.to_string(),
+                "true".to_string(),
+            ),
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_NORM_LEVEL.to_string(),
+                "2".to_string(),
+            ),
+        ]);
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert!(tp.cdc_enabled);
+        assert_eq!(tp.cdc_min_chunk_size, 256 * 1024);
+        assert_eq!(tp.cdc_max_chunk_size, 1024 * 1024);
+        assert_eq!(tp.cdc_norm_level, 2);
+    }
+
+    #[test]
+    fn test_cdc_negative_norm_level() {
+        let props = HashMap::from([
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED.to_string(),
+                "true".to_string(),
+            ),
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_NORM_LEVEL.to_string(),
+                "-2".to_string(),
+            ),
+        ]);
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert_eq!(tp.cdc_norm_level, -2);
+    }
+
+    #[test]
+    fn test_cdc_invalid_min_chunk_size() {
+        let props = HashMap::from([
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED.to_string(),
+                "true".to_string(),
+            ),
+            (
+                
TableProperties::PROPERTY_PARQUET_CDC_MIN_CHUNK_SIZE.to_string(),
+                "not_a_number".to_string(),
+            ),
+        ]);
+        let err = TableProperties::try_from(&props).unwrap_err();
+        assert!(
+            err.to_string().contains(
+                "Invalid value for 
write.parquet.content-defined-chunking.min-chunk-size"
+            )
+        );
+    }
+
+    #[test]
+    fn test_cdc_invalid_norm_level() {
+        let props = HashMap::from([
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_ENABLED.to_string(),
+                "true".to_string(),
+            ),
+            (
+                TableProperties::PROPERTY_PARQUET_CDC_NORM_LEVEL.to_string(),
+                "not_a_number".to_string(),
+            ),
+        ]);
+        let err = TableProperties::try_from(&props).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Invalid value for 
write.parquet.content-defined-chunking.norm-level")
+        );
+    }
+
+    #[test]
+    fn test_cdc_no_properties() {
+        let props = HashMap::from([("some.other.property".to_string(), 
"value".to_string())]);
+        let tp = TableProperties::try_from(&props).unwrap();
+        assert!(!tp.cdc_enabled);
+    }
 }
diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs 
b/crates/integrations/datafusion/src/physical_plan/write.rs
index 3b227e20f..282d1005b 100644
--- a/crates/integrations/datafusion/src/physical_plan/write.rs
+++ b/crates/integrations/datafusion/src/physical_plan/write.rs
@@ -45,7 +45,7 @@ use iceberg::writer::file_writer::location_generator::{
 };
 use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
 use iceberg::{Error, ErrorKind};
-use parquet::file::properties::WriterProperties;
+use parquet::file::properties::{CdcOptions, WriterPropertiesBuilder};
 use uuid::Uuid;
 
 use crate::physical_plan::DATA_FILES_COL_NAME;
@@ -226,8 +226,17 @@ impl ExecutionPlan for IcebergWriteExec {
         }
 
         // Create data file writer builder
+        let cdc_options = table_props.cdc_enabled.then_some(CdcOptions {
+            min_chunk_size: table_props.cdc_min_chunk_size,
+            max_chunk_size: table_props.cdc_max_chunk_size,
+            norm_level: table_props.cdc_norm_level,
+        });
+        let writer_properties = WriterPropertiesBuilder::default()
+            .set_content_defined_chunking(cdc_options)
+            .build();
+
         let parquet_file_writer_builder = 
ParquetWriterBuilder::new_with_match_mode(
-            WriterProperties::default(),
+            writer_properties,
             self.table.metadata().current_schema().clone(),
             FieldMatchMode::Name,
         );
diff --git a/crates/storage/opendal/Cargo.toml 
b/crates/storage/opendal/Cargo.toml
index 55aa6ac75..549959b53 100644
--- a/crates/storage/opendal/Cargo.toml
+++ b/crates/storage/opendal/Cargo.toml
@@ -28,11 +28,12 @@ keywords = ["iceberg", "opendal", "storage"]
 
 [features]
 default = ["opendal-memory", "opendal-fs", "opendal-s3"]
-opendal-all = ["opendal-memory", "opendal-fs", "opendal-s3", "opendal-gcs", 
"opendal-oss", "opendal-azdls"]
+opendal-all = ["opendal-memory", "opendal-fs", "opendal-s3", "opendal-gcs", 
"opendal-oss", "opendal-azdls", "opendal-hf"]
 
 opendal-azdls = ["opendal/services-azdls"]
 opendal-fs = ["opendal/services-fs"]
 opendal-gcs = ["opendal/services-gcs"]
+opendal-hf = ["opendal/services-hf"]
 opendal-memory = ["opendal/services-memory"]
 opendal-oss = ["opendal/services-oss"]
 opendal-s3 = ["opendal/services-s3", "reqsign-aws-v4", "reqsign-core"]
diff --git a/crates/storage/opendal/src/hf.rs b/crates/storage/opendal/src/hf.rs
new file mode 100644
index 000000000..a7ca2d884
--- /dev/null
+++ b/crates/storage/opendal/src/hf.rs
@@ -0,0 +1,348 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! HuggingFace Hub storage backend.
+
+use std::collections::HashMap;
+
+use iceberg::io::{HF_ENDPOINT, HF_REVISION, HF_TOKEN};
+use iceberg::{Error, ErrorKind, Result};
+use opendal::{Configurator, Operator, OperatorUri};
+
+use crate::utils::from_opendal_error;
+
+// ---------------------------------------------------------------------------
+// Minimal URI parser — extracts only what the caller needs.
+// TODO: remove once opendal-service-hf exports its URI parser publicly.
+// ---------------------------------------------------------------------------
+
+/// Repository type of a HuggingFace Hub repository.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum HfRepoType {
+    /// Model repository (`models/` prefix).
+    Model,
+    /// Dataset repository (`datasets/` prefix).
+    Dataset,
+    /// Spaces application repository (`spaces/` prefix).
+    Space,
+    /// XET-backed object-storage bucket (`buckets/` prefix).
+    Bucket,
+}
+
+impl HfRepoType {
+    /// Parse a repo-type keyword (singular or plural) into the corresponding 
variant.
+    fn parse(s: &str) -> Option<Self> {
+        match s.to_lowercase().replace(' ', "").as_str() {
+            "model" | "models" => Some(Self::Model),
+            "dataset" | "datasets" => Some(Self::Dataset),
+            "space" | "spaces" => Some(Self::Space),
+            "bucket" | "buckets" => Some(Self::Bucket),
+            _ => None,
+        }
+    }
+
+    fn canonical(self) -> &'static str {
+        match self {
+            Self::Model => "models",
+            Self::Dataset => "datasets",
+            Self::Space => "spaces",
+            Self::Bucket => "buckets",
+        }
+    }
+}
+
+/// Parsed HuggingFace URI: `hf://<repo_type>/<repo_id>[@<revision>][/<path>]`.
+///
+/// `repo_type` must be explicitly specified — there is no implicit default.
+/// Only the fields required by this crate are stored; revision is consumed
+/// during parsing but not retained.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct HfUri {
+    pub repo_type: HfRepoType,
+    /// e.g. `"user/my-repo"`.
+    pub repo_id: String,
+    /// Path within the repository, e.g. `"train/data.parquet"`. Empty at repo 
root.
+    pub path: String,
+}
+
+impl HfUri {
+    /// Parse a full `hf://…` URI or the bare path portion (without scheme).
+    /// Returns `None` if the URI does not begin with a recognized repo-type 
prefix
+    /// (`models/`, `datasets/`, `spaces/`, or `buckets/`).
+    pub(crate) fn parse(full_uri: &str) -> Option<Self> {
+        let s = full_uri.strip_prefix("hf://").unwrap_or(full_uri);
+        if s.is_empty() {
+            return None;
+        }
+
+        // Require an explicit repo_type prefix — no implicit default.
+        let (first, rest) = s.split_once('/')?;
+        let repo_type = HfRepoType::parse(first)?;
+        let s = rest;
+
+        // Remaining: `<repo_id>[@<revision>][/<path_in_repo>]`
+        let (repo_id, path) = if s.contains('/') {
+            // Check if `@` appears in the first two slash-segments (the 
repo_id portion).
+            // This distinguishes "user/repo@rev/file" from 
"user/repo/path/@file".
+            let first_two = s.splitn(3, 
'/').take(2).collect::<Vec<_>>().join("/");
+            if first_two.contains('@') {
+                let (repo_id, rev_and_path) = s.split_once('@').unwrap();
+                let rev_and_path = rev_and_path.replace("%2F", "/");
+                (repo_id.to_string(), path_after_revision(&rev_and_path))
+            } else {
+                let segs: Vec<_> = s.splitn(3, '/').collect();
+                let repo_id = format!("{}/{}", segs[0], segs[1]);
+                let path = segs.get(2).copied().unwrap_or("").to_string();
+                (repo_id, path)
+            }
+        } else if let Some((repo_id, _)) = s.split_once('@') {
+            (repo_id.to_string(), String::new())
+        } else {
+            (s.to_string(), String::new())
+        };
+
+        Some(Self {
+            repo_type,
+            repo_id,
+            path,
+        })
+    }
+}
+
+/// Given the string after `@`, extract the path-in-repo, correctly skipping
+/// multi-segment special refs (`refs/convert/parquet`, `refs/pr/N`).
+/// These are the only two multi-segment special ref prefixes in HF's git 
model.
+fn path_after_revision(rev_and_path: &str) -> String {
+    if !rev_and_path.contains('/') {
+        return String::new();
+    }
+    if let Some(rest) = rev_and_path.strip_prefix("refs/convert/") {
+        return rest
+            .find('/')
+            .map_or(String::new(), |i| rest[i + 1..].to_string());
+    }
+    if let Some(rest) = rev_and_path.strip_prefix("refs/pr/") {
+        return rest
+            .find('/')
+            .map_or(String::new(), |i| rest[i + 1..].to_string());
+    }
+    rev_and_path
+        .split_once('/')
+        .map(|(_, path)| path.to_string())
+        .unwrap_or_default()
+}
+
+// ---------------------------------------------------------------------------
+// Public helpers used by lib.rs
+// ---------------------------------------------------------------------------
+
+/// Parse iceberg `StorageConfig` properties into an opendal 
[`opendal::services::HfConfig`].
+pub(crate) fn hf_config_parse(m: HashMap<String, String>) -> 
Result<opendal::services::HfConfig> {
+    let mut cfg = opendal::services::HfConfig::default();
+    if let Some(token) = m.get(HF_TOKEN) {
+        cfg.token = Some(token.clone());
+    }
+    if let Some(endpoint) = m.get(HF_ENDPOINT) {
+        cfg.endpoint = Some(endpoint.clone());
+    }
+    if let Some(revision) = m.get(HF_REVISION) {
+        cfg.revision = Some(revision.clone());
+    }
+    Ok(cfg)
+}
+
+/// Build an [`Operator`] for the given `hf://…` path and return it together 
with
+/// the relative path-in-repo.
+///
+/// URI parsing is delegated to opendal's [`HfConfig::from_uri`]. The base 
config
+/// provides fallback values for `revision` and `endpoint`; the `token` is 
always
+/// taken from the base config and never from the URI.
+pub(crate) fn hf_config_build<'a>(
+    cfg: &opendal::services::HfConfig,
+    path: &'a str,
+) -> Result<(Operator, &'a str)> {
+    let uri = OperatorUri::new(path, Vec::<(String, 
String)>::new()).map_err(|e| {
+        Error::new(ErrorKind::DataInvalid, format!("Invalid hf url: 
{path}")).with_source(e)
+    })?;
+
+    let mut hf_cfg = opendal::services::HfConfig::from_uri(&uri).map_err(|e| {
+        Error::new(ErrorKind::DataInvalid, format!("Invalid hf url: 
{path}")).with_source(e)
+    })?;
+
+    // Token must come from config only, never from the URI.
+    hf_cfg.token = cfg.token.clone();
+
+    if hf_cfg.endpoint.is_none() {
+        hf_cfg.endpoint = cfg.endpoint.clone();
+    }
+    if hf_cfg.revision.is_none() {
+        hf_cfg.revision = cfg.revision.clone();
+    }
+
+    let parsed = HfUri::parse(path)
+        .ok_or_else(|| Error::new(ErrorKind::DataInvalid, format!("Invalid hf 
url: {path}")))?;
+    let relative_path = &path[path.len() - parsed.path.len()..];
+
+    let op = Operator::from_config(hf_cfg)
+        .map_err(from_opendal_error)?
+        .finish();
+    Ok((op, relative_path))
+}
+
+/// Returns a stable cache key for `delete_stream` batching: 
`"<repo_type>/<repo_id>"`
+/// (e.g. `"buckets/user/my-repo"`), without revision.
+/// Repo type is included so bucket and dataset paths to the same repo use 
separate operators.
+/// Falls back to the full path so that unparsable paths never share an 
operator accidentally.
+pub(crate) fn hf_batch_key(path: &str) -> String {
+    HfUri::parse(path)
+        .map(|u| format!("{}/{}", u.repo_type.canonical(), u.repo_id))
+        .unwrap_or_else(|| path.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn parse(uri: &str) -> HfUri {
+        HfUri::parse(uri).unwrap_or_else(|| panic!("parse failed for {uri:?}"))
+    }
+
+    #[test]
+    fn test_model_prefix() {
+        let u = parse("hf://models/user/my-model/path/to/file.parquet");
+        assert_eq!(u.repo_type, HfRepoType::Model);
+        assert_eq!(u.repo_id, "user/my-model");
+        assert_eq!(u.path, "path/to/file.parquet");
+    }
+
+    #[test]
+    fn test_dataset_prefix() {
+        let u = parse("hf://datasets/user/my-dataset/train/data.parquet");
+        assert_eq!(u.repo_type, HfRepoType::Dataset);
+        assert_eq!(u.repo_id, "user/my-dataset");
+        assert_eq!(u.path, "train/data.parquet");
+    }
+
+    #[test]
+    fn test_bucket_prefix() {
+        let u = parse("hf://buckets/myorg/my-bucket/iceberg/metadata/v1.json");
+        assert_eq!(u.repo_type, HfRepoType::Bucket);
+        assert_eq!(u.repo_id, "myorg/my-bucket");
+        assert_eq!(u.path, "iceberg/metadata/v1.json");
+    }
+
+    #[test]
+    fn test_revision() {
+        let u = parse("hf://datasets/user/my-dataset@main/train/data.parquet");
+        assert_eq!(u.repo_type, HfRepoType::Dataset);
+        assert_eq!(u.repo_id, "user/my-dataset");
+        assert_eq!(u.path, "train/data.parquet");
+    }
+
+    #[test]
+    fn test_refs_convert_revision() {
+        let u = parse("hf://datasets/squad@refs/convert/parquet/data.parquet");
+        assert_eq!(u.path, "data.parquet");
+    }
+
+    #[test]
+    fn test_refs_pr_revision() {
+        let u = parse("hf://models/user/repo@refs/pr/10/file.txt");
+        assert_eq!(u.path, "file.txt");
+    }
+
+    #[test]
+    fn test_encoded_revision() {
+        let u = parse("hf://models/user/repo@refs%2Fpr%2F10/file.txt");
+        assert_eq!(u.path, "file.txt");
+    }
+
+    #[test]
+    fn test_no_path() {
+        let u = parse("hf://models/user/my-model");
+        assert_eq!(u.repo_id, "user/my-model");
+        assert_eq!(u.path, "");
+    }
+
+    #[test]
+    fn test_at_in_path_not_revision() {
+        let u = parse("hf://models/user/repo/path/@not-a-revision.txt");
+        assert_eq!(u.path, "path/@not-a-revision.txt");
+    }
+
+    #[test]
+    fn test_single_segment_repo_id() {
+        // Without revision and path: unambiguous.
+        let u = parse("hf://models/gpt2");
+        assert_eq!(u.repo_type, HfRepoType::Model);
+        assert_eq!(u.repo_id, "gpt2");
+        assert_eq!(u.path, "");
+
+        // With explicit revision: single-segment repos with paths are parsed 
correctly.
+        let u = parse("hf://models/gpt2@main/config.json");
+        assert_eq!(u.repo_type, HfRepoType::Model);
+        assert_eq!(u.repo_id, "gpt2");
+        assert_eq!(u.path, "config.json");
+    }
+
+    #[test]
+    fn test_batch_key() {
+        assert_eq!(
+            hf_batch_key("hf://datasets/user/repo@main/path/file.parquet"),
+            "datasets/user/repo"
+        );
+        assert_eq!(
+            hf_batch_key("hf://buckets/org/bucket/data/file.parquet"),
+            "buckets/org/bucket"
+        );
+        // Same repo_id, different repo_type → different keys.
+        assert_ne!(
+            hf_batch_key("hf://buckets/user/repo/file"),
+            hf_batch_key("hf://datasets/user/repo/file"),
+        );
+    }
+
+    #[test]
+    fn test_invalid_uri() {
+        assert!(HfUri::parse("hf://").is_none());
+        // bare repo-type, no repo_id
+        assert!(HfUri::parse("hf://datasets").is_none());
+        // missing repo-type prefix
+        assert!(HfUri::parse("hf://user/my-model").is_none());
+        assert!(HfUri::parse("hf://gpt2").is_none());
+        // unrecognized repo-type prefix
+        assert!(HfUri::parse("hf://repos/user/repo/file").is_none());
+    }
+
+    #[test]
+    fn test_hf_config_build_relative_path() {
+        let cfg = opendal::services::HfConfig::default();
+
+        let (_, rel) = hf_config_build(
+            &cfg,
+            "hf://datasets/user/my-dataset@main/train/data.parquet",
+        )
+        .unwrap();
+        assert_eq!(rel, "train/data.parquet");
+
+        let (_, rel) = hf_config_build(&cfg, 
"hf://models/user/my-model/config.json").unwrap();
+        assert_eq!(rel, "config.json");
+
+        let (_, rel) = hf_config_build(&cfg, 
"hf://models/user/my-model").unwrap();
+        assert_eq!(rel, "");
+    }
+}
diff --git a/crates/storage/opendal/src/lib.rs 
b/crates/storage/opendal/src/lib.rs
index 65deaa5f4..67113833f 100644
--- a/crates/storage/opendal/src/lib.rs
+++ b/crates/storage/opendal/src/lib.rs
@@ -51,6 +51,14 @@ cfg_if! {
     }
 }
 
+cfg_if! {
+    if #[cfg(feature = "opendal-hf")] {
+        mod hf;
+        use hf::*;
+        use opendal::services::HfConfig;
+    }
+}
+
 cfg_if! {
     if #[cfg(feature = "opendal-fs")] {
         mod fs;
@@ -120,6 +128,9 @@ pub enum OpenDalStorageFactory {
     /// Azure Data Lake Storage factory.
     #[cfg(feature = "opendal-azdls")]
     Azdls,
+    /// HuggingFace Hub storage factory.
+    #[cfg(feature = "opendal-hf")]
+    Hf,
 }
 
 #[typetag::serde(name = "OpenDalStorageFactory")]
@@ -152,6 +163,10 @@ impl StorageFactory for OpenDalStorageFactory {
             OpenDalStorageFactory::Azdls => Ok(Arc::new(OpenDalStorage::Azdls {
                 config: azdls_config_parse(config.props().clone())?.into(),
             })),
+            #[cfg(feature = "opendal-hf")]
+            OpenDalStorageFactory::Hf => Ok(Arc::new(OpenDalStorage::Hf {
+                config: hf_config_parse(config.props().clone())?.into(),
+            })),
             #[cfg(all(
                 not(feature = "opendal-memory"),
                 not(feature = "opendal-fs"),
@@ -159,6 +174,7 @@ impl StorageFactory for OpenDalStorageFactory {
                 not(feature = "opendal-gcs"),
                 not(feature = "opendal-oss"),
                 not(feature = "opendal-azdls"),
+                not(feature = "opendal-hf"),
             ))]
             _ => Err(Error::new(
                 ErrorKind::FeatureUnsupported,
@@ -218,6 +234,16 @@ pub enum OpenDalStorage {
         /// Azure DLS configuration.
         config: Arc<AzdlsConfig>,
     },
+    /// HuggingFace Hub storage variant.
+    ///
+    /// Accepts paths of the form
+    /// `hf://<repo_type>/<owner>/<repo>[@<revision>]/<path_in_repo>`,
+    /// where `<repo_type>` must be one of `models`, `datasets`, `spaces`, or 
`buckets`.
+    #[cfg(feature = "opendal-hf")]
+    Hf {
+        /// HuggingFace Hub configuration (token + endpoint).
+        config: Arc<HfConfig>,
+    },
 }
 
 impl OpenDalStorage {
@@ -311,12 +337,15 @@ impl OpenDalStorage {
             }
             #[cfg(feature = "opendal-azdls")]
             OpenDalStorage::Azdls { config } => azdls_create_operator(path, 
config)?,
+            #[cfg(feature = "opendal-hf")]
+            OpenDalStorage::Hf { config } => hf_config_build(config, path)?,
             #[cfg(all(
                 not(feature = "opendal-s3"),
                 not(feature = "opendal-fs"),
                 not(feature = "opendal-gcs"),
                 not(feature = "opendal-oss"),
                 not(feature = "opendal-azdls"),
+                not(feature = "opendal-hf"),
             ))]
             _ => {
                 return Err(Error::new(
@@ -332,6 +361,21 @@ impl OpenDalStorage {
         Ok((operator, relative_path))
     }
 
+    /// Returns a cache key used by `delete_stream` to group paths by storage 
operator.
+    ///
+    /// For most backends the URL host (bucket name) is sufficient. For HF the 
host
+    /// encodes the repo type, not the repo identity, so a more specific key 
is used.
+    fn batch_key_for_path(&self, path: &str) -> String {
+        match self {
+            #[cfg(feature = "opendal-hf")]
+            OpenDalStorage::Hf { .. } => hf_batch_key(path),
+            _ => url::Url::parse(path)
+                .ok()
+                .and_then(|u| u.host_str().map(|s| s.to_string()))
+                .unwrap_or_default(),
+        }
+    }
+
     /// Extracts the relative path from an absolute path without building an 
operator.
     ///
     /// This is a lightweight alternative to 
[`create_operator`](Self::create_operator) for cases
@@ -408,12 +452,20 @@ impl OpenDalStorage {
                 let relative_path_len = azure_path.path.len();
                 Ok(&path[path.len() - relative_path_len..])
             }
+            #[cfg(feature = "opendal-hf")]
+            OpenDalStorage::Hf { .. } => {
+                let parsed = hf::HfUri::parse(path).ok_or_else(|| {
+                    Error::new(ErrorKind::DataInvalid, format!("Invalid hf 
url: {path}"))
+                })?;
+                Ok(&path[path.len() - parsed.path.len()..])
+            }
             #[cfg(all(
                 not(feature = "opendal-s3"),
                 not(feature = "opendal-fs"),
                 not(feature = "opendal-gcs"),
                 not(feature = "opendal-oss"),
                 not(feature = "opendal-azdls"),
+                not(feature = "opendal-hf"),
             ))]
             _ => Err(Error::new(
                 ErrorKind::FeatureUnsupported,
@@ -493,10 +545,7 @@ impl Storage for OpenDalStorage {
         let mut deleters: HashMap<String, opendal::Deleter> = HashMap::new();
 
         while let Some(path) = paths.next().await {
-            let bucket = url::Url::parse(&path)
-                .ok()
-                .and_then(|u| u.host_str().map(|s| s.to_string()))
-                .unwrap_or_default();
+            let bucket = self.batch_key_for_path(&path);
 
             let (relative_path, deleter) = match deleters.entry(bucket) {
                 Entry::Occupied(entry) => {
diff --git a/crates/storage/opendal/src/resolving.rs 
b/crates/storage/opendal/src/resolving.rs
index 621495519..86993220a 100644
--- a/crates/storage/opendal/src/resolving.rs
+++ b/crates/storage/opendal/src/resolving.rs
@@ -50,6 +50,7 @@ pub const SCHEME_ABFSS: &str = "abfss";
 pub const SCHEME_ABFS: &str = "abfs";
 pub const SCHEME_WASBS: &str = "wasbs";
 pub const SCHEME_WASB: &str = "wasb";
+pub const SCHEME_HF: &str = "hf";
 
 /// Parse a URL scheme string.
 fn parse_scheme(scheme: &str) -> Result<&'static str> {
@@ -60,6 +61,7 @@ fn parse_scheme(scheme: &str) -> Result<&'static str> {
         SCHEME_GS | SCHEME_GCS => Ok("gcs"),
         SCHEME_OSS => Ok("oss"),
         SCHEME_ABFSS | SCHEME_ABFS | SCHEME_WASBS | SCHEME_WASB => Ok("azdls"),
+        SCHEME_HF => Ok("hf"),
         s => Err(Error::new(
             ErrorKind::FeatureUnsupported,
             format!("Unsupported storage scheme: {s}"),
@@ -118,6 +120,13 @@ fn build_storage_for_scheme(
         "file" => Ok(OpenDalStorage::LocalFs),
         #[cfg(feature = "opendal-memory")]
         "memory" => 
Ok(OpenDalStorage::Memory(crate::memory::memory_config_build()?)),
+        #[cfg(feature = "opendal-hf")]
+        "hf" => {
+            let config = crate::hf::hf_config_parse(props.clone())?;
+            Ok(OpenDalStorage::Hf {
+                config: Arc::new(config),
+            })
+        }
         unsupported => Err(Error::new(
             ErrorKind::FeatureUnsupported,
             format!("Unsupported storage scheme: {unsupported}"),
@@ -196,7 +205,7 @@ impl StorageFactory for OpenDalResolvingStorageFactory {
 pub struct OpenDalResolvingStorage {
     /// Configuration properties shared across all backends.
     props: HashMap<String, String>,
-    /// Cache of scheme to storage mappings.
+    /// Cache of canonical scheme to storage mappings.
     #[serde(skip, default)]
     storages: RwLock<HashMap<&'static str, Arc<OpenDalStorage>>>,
     /// Custom AWS credential loader for S3 storage.
@@ -206,7 +215,7 @@ pub struct OpenDalResolvingStorage {
 }
 
 impl OpenDalResolvingStorage {
-    /// Resolve the storage for the given path by extracting the scheme and
+    /// Resolve the storage for the given path by extracting the canonical 
scheme and
     /// returning the cached or newly-created [`OpenDalStorage`].
     fn resolve(&self, path: &str) -> Result<Arc<OpenDalStorage>> {
         let scheme = extract_scheme(path)?;
@@ -281,7 +290,7 @@ impl Storage for OpenDalResolvingStorage {
     }
 
     async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> 
Result<()> {
-        // Group paths by scheme so each resolved storage receives a batch,
+        // Group paths by canonical scheme so each resolved storage receives a 
batch,
         // avoiding repeated operator creation per path.
         let mut grouped: HashMap<&'static str, Vec<String>> = HashMap::new();
         while let Some(path) = paths.next().await {
diff --git a/crates/storage/opendal/tests/file_io_hf_test.rs 
b/crates/storage/opendal/tests/file_io_hf_test.rs
new file mode 100644
index 000000000..3c773887f
--- /dev/null
+++ b/crates/storage/opendal/tests/file_io_hf_test.rs
@@ -0,0 +1,376 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for FileIO HuggingFace Hub.
+//!
+//! These tests require a real HuggingFace token and are skipped when
+//! `HF_TOKEN` is not set in the environment.
+//!
+//! The following environment variables are used:
+//! - `HF_TOKEN`          — HuggingFace API token (required)
+//! - `HF_BUCKET` — `owner/repo` for a bucket-type repo (required when running 
bucket tests)
+//! - `HF_DATASET` — `owner/repo` for a dataset-type repo (required when 
running dataset tests)
+
+#[cfg(feature = "opendal-hf")]
+mod tests {
+    use std::sync::Arc;
+
+    use bytes::Bytes;
+    use futures::StreamExt;
+    use iceberg::io::{FileIO, FileIOBuilder, HF_REVISION, HF_TOKEN};
+    use iceberg_storage_opendal::{OpenDalResolvingStorageFactory, 
OpenDalStorageFactory};
+    use iceberg_test_utils::{normalize_test_name_with_parts, set_up};
+
+    const ENV_HF_TOKEN: &str = "HF_TOKEN";
+    const ENV_HF_BUCKET: &str = "HF_BUCKET";
+    const ENV_HF_DATASET: &str = "HF_DATASET";
+
+    macro_rules! require_env {
+        ($var:expr) => {
+            match std::env::var($var) {
+                Ok(v) => v,
+                Err(_) => {
+                    eprintln!("Skipping HF test: {} not set", $var);
+                    return;
+                }
+            }
+        };
+    }
+
+    fn get_file_io(token: &str) -> FileIO {
+        set_up();
+        FileIOBuilder::new(Arc::new(OpenDalStorageFactory::Hf))
+            .with_props(vec![(HF_TOKEN, token.to_string())])
+            .build()
+    }
+
+    fn get_resolving_file_io(token: &str) -> FileIO {
+        set_up();
+        FileIOBuilder::new(Arc::new(OpenDalResolvingStorageFactory::new()))
+            .with_props(vec![(HF_TOKEN, token.to_string())])
+            .build()
+    }
+
+    // --- bucket tests ---
+
+    #[tokio::test]
+    async fn test_hf_bucket_write_read_delete() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let file_io = get_file_io(&token);
+        let path = format!(
+            "hf://buckets/{}/{}",
+            bucket,
+            normalize_test_name_with_parts!("test_hf_bucket_write_read_delete")
+        );
+
+        let _ = file_io.delete(&path).await;
+        assert!(!file_io.exists(&path).await.unwrap());
+
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"iceberg-hf-bucket"))
+            .await
+            .unwrap();
+        assert!(file_io.exists(&path).await.unwrap());
+
+        let data = file_io.new_input(&path).unwrap().read().await.unwrap();
+        assert_eq!(data, Bytes::from_static(b"iceberg-hf-bucket"));
+
+        file_io.delete(&path).await.unwrap();
+        assert!(!file_io.exists(&path).await.unwrap());
+    }
+
+    #[tokio::test]
+    async fn test_hf_bucket_overwrite() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let file_io = get_file_io(&token);
+        let path = format!(
+            "hf://buckets/{}/{}",
+            bucket,
+            normalize_test_name_with_parts!("test_hf_bucket_overwrite")
+        );
+
+        let _ = file_io.delete(&path).await;
+
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"first"))
+            .await
+            .unwrap();
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"second"))
+            .await
+            .unwrap();
+
+        let data = file_io.new_input(&path).unwrap().read().await.unwrap();
+        assert_eq!(data, Bytes::from_static(b"second"));
+
+        file_io.delete(&path).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_hf_bucket_range_read() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let file_io = get_file_io(&token);
+        let path = format!(
+            "hf://buckets/{}/{}",
+            bucket,
+            normalize_test_name_with_parts!("test_hf_bucket_range_read")
+        );
+
+        let _ = file_io.delete(&path).await;
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"hello world"))
+            .await
+            .unwrap();
+
+        let reader = file_io.new_input(&path).unwrap().reader().await.unwrap();
+        let chunk = reader.read(6..11).await.unwrap();
+        assert_eq!(chunk, Bytes::from_static(b"world"));
+
+        file_io.delete(&path).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_hf_bucket_metadata() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let file_io = get_file_io(&token);
+        let path = format!(
+            "hf://buckets/{}/{}",
+            bucket,
+            normalize_test_name_with_parts!("test_hf_bucket_metadata")
+        );
+
+        let _ = file_io.delete(&path).await;
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"metadata-test"))
+            .await
+            .unwrap();
+
+        let meta = file_io.new_input(&path).unwrap().metadata().await.unwrap();
+        assert_eq!(meta.size, b"metadata-test".len() as u64);
+
+        file_io.delete(&path).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_hf_bucket_delete_stream() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let file_io = get_file_io(&token);
+
+        let paths: Vec<String> = (0..3)
+            .map(|i| {
+                format!(
+                    "hf://buckets/{}/{}/file-{i}",
+                    bucket,
+                    
normalize_test_name_with_parts!("test_hf_bucket_delete_stream")
+                )
+            })
+            .collect();
+
+        for path in &paths {
+            let _ = file_io.delete(path).await;
+            file_io
+                .new_output(path)
+                .unwrap()
+                .write(Bytes::from_static(b"x"))
+                .await
+                .unwrap();
+            assert!(file_io.exists(path).await.unwrap());
+        }
+
+        let stream = futures::stream::iter(paths.clone()).boxed();
+        file_io.delete_stream(stream).await.unwrap();
+
+        for path in &paths {
+            assert!(!file_io.exists(path).await.unwrap());
+        }
+    }
+
+    #[tokio::test]
+    async fn test_hf_bucket_delete_stream_empty() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let file_io = get_file_io(&token);
+        file_io
+            .delete_stream(futures::stream::empty().boxed())
+            .await
+            .unwrap();
+    }
+
+    // --- dataset tests ---
+
+    #[tokio::test]
+    async fn test_hf_dataset_write_read_delete() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let dataset = require_env!(ENV_HF_DATASET);
+        let file_io = get_file_io(&token);
+        let path = format!(
+            "hf://datasets/{}/{}",
+            dataset,
+            
normalize_test_name_with_parts!("test_hf_dataset_write_read_delete")
+        );
+
+        let _ = file_io.delete(&path).await;
+        assert!(!file_io.exists(&path).await.unwrap());
+
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"iceberg-hf-dataset"))
+            .await
+            .unwrap();
+        assert!(file_io.exists(&path).await.unwrap());
+
+        let data = file_io.new_input(&path).unwrap().read().await.unwrap();
+        assert_eq!(data, Bytes::from_static(b"iceberg-hf-dataset"));
+
+        file_io.delete(&path).await.unwrap();
+        assert!(!file_io.exists(&path).await.unwrap());
+    }
+
+    // --- revision tests ---
+
+    #[tokio::test]
+    async fn test_hf_explicit_revision_in_uri() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let file_io = get_file_io(&token);
+        let name = 
normalize_test_name_with_parts!("test_hf_explicit_revision_in_uri");
+
+        let bucket = require_env!(ENV_HF_BUCKET);
+        // Write without revision, read back with explicit @main.
+        let write_path = format!("hf://buckets/{}/{}", bucket, name);
+        let read_path = format!("hf://buckets/{}@main/{}", bucket, name);
+
+        let _ = file_io.delete(&write_path).await;
+        file_io
+            .new_output(&write_path)
+            .unwrap()
+            .write(Bytes::from_static(b"revision-test"))
+            .await
+            .unwrap();
+
+        let data = 
file_io.new_input(&read_path).unwrap().read().await.unwrap();
+        assert_eq!(data, Bytes::from_static(b"revision-test"));
+
+        file_io.delete(&write_path).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_hf_revision_from_config() {
+        let token = require_env!(ENV_HF_TOKEN);
+        set_up();
+
+        // Build FileIO with HF_REVISION set in config — paths without 
@revision use it.
+        let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::Hf))
+            .with_props(vec![
+                (HF_TOKEN, token.to_string()),
+                (HF_REVISION, "main".to_string()),
+            ])
+            .build();
+
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let path = format!(
+            "hf://buckets/{}/{}",
+            bucket,
+            normalize_test_name_with_parts!("test_hf_revision_from_config")
+        );
+
+        let _ = file_io.delete(&path).await;
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"config-revision"))
+            .await
+            .unwrap();
+
+        let data = file_io.new_input(&path).unwrap().read().await.unwrap();
+        assert_eq!(data, Bytes::from_static(b"config-revision"));
+
+        file_io.delete(&path).await.unwrap();
+    }
+
+    // --- resolving storage tests ---
+
+    #[tokio::test]
+    async fn test_hf_resolving_storage() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let file_io = get_resolving_file_io(&token);
+
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let path = format!(
+            "hf://buckets/{}/{}",
+            bucket,
+            normalize_test_name_with_parts!("test_hf_resolving_storage")
+        );
+
+        let _ = file_io.delete(&path).await;
+
+        file_io
+            .new_output(&path)
+            .unwrap()
+            .write(Bytes::from_static(b"resolving"))
+            .await
+            .unwrap();
+
+        let data = file_io.new_input(&path).unwrap().read().await.unwrap();
+        assert_eq!(data, Bytes::from_static(b"resolving"));
+
+        file_io.delete(&path).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_hf_resolving_delete_stream_across_repo_types() {
+        let token = require_env!(ENV_HF_TOKEN);
+        let file_io = get_resolving_file_io(&token);
+
+        let bucket = require_env!(ENV_HF_BUCKET);
+        let dataset = require_env!(ENV_HF_DATASET);
+        let name = 
normalize_test_name_with_parts!("test_hf_resolving_delete_stream_across");
+        let bucket_path = format!("hf://buckets/{}/{}", bucket, name);
+        let dataset_path = format!("hf://datasets/{}/{}", dataset, name);
+
+        for path in [&bucket_path, &dataset_path] {
+            let _ = file_io.delete(path).await;
+            file_io
+                .new_output(path)
+                .unwrap()
+                .write(Bytes::from_static(b"x"))
+                .await
+                .unwrap();
+            assert!(file_io.exists(path).await.unwrap());
+        }
+
+        let stream = futures::stream::iter(vec![bucket_path.clone(), 
dataset_path.clone()]).boxed();
+        file_io.delete_stream(stream).await.unwrap();
+
+        assert!(!file_io.exists(&bucket_path).await.unwrap());
+        assert!(!file_io.exists(&dataset_path).await.unwrap());
+    }
+}

Reply via email to