This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch release-1.3 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit 0a657b324e5796abb26c96365482be6489f52373 Author: umi <[email protected]> AuthorDate: Sun Oct 26 13:46:15 2025 +0800 [doc] Supplementary the document of python REST API (#6466) --- docs/content/concepts/rest/dlf.md | 10 +++- docs/content/program-api/python-api.md | 94 +++++++++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/docs/content/concepts/rest/dlf.md b/docs/content/concepts/rest/dlf.md index 9510fa2431..d093ad07df 100644 --- a/docs/content/concepts/rest/dlf.md +++ b/docs/content/concepts/rest/dlf.md @@ -3,8 +3,9 @@ title: "DLF Token" weight: 3 type: docs aliases: -- /concepts/rest/dlf.html + - /concepts/rest/dlf.html --- + <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -51,6 +52,13 @@ WITH ( ); ``` +- `uri`: Access the URI of the DLF Rest Catalog Server. +- `warehouse`: DLF Catalog name +- `token.provider`: token provider +- `dlf.access-key-id`: The Access Key ID required to access the DLF service, usually referring to the AccessKey of your + RAM user +- `dlf.access-key-secret`:The Access Key Secret required to access the DLF service + You can grant specific permissions to a RAM user and use the RAM user's access key for long-term access to your DLF resources. Compared to using the Alibaba Cloud account access key, accessing DLF resources with a RAM user access key is more secure. diff --git a/docs/content/program-api/python-api.md b/docs/content/program-api/python-api.md index 579fce109e..1f276a0b48 100644 --- a/docs/content/program-api/python-api.md +++ b/docs/content/program-api/python-api.md @@ -519,6 +519,97 @@ Key points about shard read: - **Parallel Processing**: Each shard can be processed independently for better performance - **Consistency**: Combining all shards should produce the complete table data +## REST API + +### Create Catalog + +The sample code is as follows. The detailed meaning of option can be found in [DLF Token](../concepts/rest/dlf.md). + +```python +from pypaimon import CatalogFactory + +# Note that keys and values are all string +catalog_options = { + 'metastore': 'rest', + 'warehouse': 'xxx', + 'uri': 'xxx', + 'dlf.region': 'xxx', + 'token.provider': 'xxx', + 'dlf.access-key-id': 'xxx', + 'dlf.access-key-secret': 'xxx' +} +catalog = CatalogFactory.create(catalog_options) +``` + +### Write And Read + +Write and read operations with RESTCatalog is exactly the same as that of FileSystemCatalog. + +```python +import pyarrow as pa +from pypaimon.api.options import Options +from pypaimon.catalog.catalog_context import CatalogContext +from pypaimon.catalog.rest.rest_catalog import RESTCatalog +from pypaimon.schema.schema import Schema + + +def write_test_table(table): + write_builder = table.new_batch_write_builder() + + # first write + table_write = write_builder.new_write() + table_commit = write_builder.new_commit() + data1 = { + 'user_id': [1, 2, 3, 4], + 'item_id': [1001, 1002, 1003, 1004], + 'behavior': ['a', 'b', 'c', 'd'], + 'dt': ['12', '34', '56', '78'], + } + pa_table = pa.Table.from_pydict(data1, schema=pa_schema) + table_write.write_arrow(pa_table) + table_commit.commit(table_write.prepare_commit()) + table_write.close() + table_commit.close() + + +def read_test_table(read_builder): + table_read = read_builder.new_read() + splits = read_builder.new_scan().plan().splits() + return table_read.to_arrow(splits) + + +options = { + 'metastore': 'rest', + 'warehouse': 'xxx', + 'uri': 'xxx', + 'dlf.region': 'xxx', + 'token.provider': 'xxx', + 'dlf.access-key-id': 'xxx', + 'dlf.access-key-secret': 'xxx' +} + +rest_catalog = RESTCatalog(CatalogContext.create_from_options(Options(options))) +print("rest catalog create success") +pa_schema = pa.schema([ + ('user_id', pa.int32()), + ('item_id', pa.int64()), + ('behavior', pa.string()), + ('dt', pa.string()), +]) + +# test parquet append only read +schema = Schema.from_pyarrow_schema(pa_schema, partition_keys=['dt']) +rest_catalog.create_table('default.test_t', schema, True) +table = rest_catalog.get_table('default.test_t') +write_test_table(table) +print("write success") + +read_builder = table.new_read_builder() +actual = read_test_table(read_builder) +print("read data:") +print(actual) +``` + ## Data Types | Python Native Type | PyArrow Type | Paimon Type | @@ -557,4 +648,5 @@ Key points about shard read: | f.contains(literal) | PredicateBuilder.contains(f, literal) | | f is in [l1, l2] | PredicateBuilder.is_in(f, [l1, l2]) | | f is not in [l1, l2] | PredicateBuilder.is_not_in(f, [l1, l2]) | -| lower <= f <= upper | PredicateBuilder.between(f, lower, upper) | \ No newline at end of file +| lower <= f <= upper | PredicateBuilder.between(f, lower, upper) | +
