This is an automated email from the ASF dual-hosted git repository.
dimas pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push:
new 596239cdb feat(docs): Add Getting Stated guide for MinIO (#2227)
596239cdb is described below
commit 596239cdba8b18431aa9a0e5ec2f317fb8365b2b
Author: Dmitri Bourlatchkov <[email protected]>
AuthorDate: Fri Aug 15 10:54:34 2025 -0400
feat(docs): Add Getting Stated guide for MinIO (#2227)
* feat(docs): Add Getting Stated guide for MinIO
A simple page of step-by-step instructions for setting
up a local environment with Polaris, MinIO and Spark.
Closes #1530
---
getting-started/assets/polaris/create-catalog.sh | 31 +++---
getting-started/assets/polaris/obtain-token.sh | 37 +++++++
getting-started/minio/README.md | 90 ++++++++++++++++
getting-started/minio/docker-compose.yml | 112 ++++++++++++++++++++
.../in-dev/unreleased/getting-started/minio.md | 115 +++++++++++++++++++++
5 files changed, 367 insertions(+), 18 deletions(-)
diff --git a/getting-started/assets/polaris/create-catalog.sh
b/getting-started/assets/polaris/create-catalog.sh
index 7d67c169c..77cee447a 100755
--- a/getting-started/assets/polaris/create-catalog.sh
+++ b/getting-started/assets/polaris/create-catalog.sh
@@ -23,23 +23,16 @@ apk add --no-cache jq
realm=${1:-"POLARIS"}
-token=${2:-""}
+TOKEN=${2:-""}
-if [ -z "$token" ]; then
- token=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \
- --user ${CLIENT_ID}:${CLIENT_SECRET} \
- -H "Polaris-Realm: $realm" \
- -d grant_type=client_credentials \
- -d scope=PRINCIPAL_ROLE:ALL | jq -r .access_token)
+BASEDIR=$(dirname $0)
- if [ -z "${token}" ]; then
- echo "Failed to obtain access token."
- exit 1
- fi
+if [ -z "$TOKEN" ]; then
+ source $BASEDIR/obtain-token.sh
fi
echo
-echo "Obtained access token: ${token}"
+echo "Obtained access token: ${TOKEN}"
STORAGE_TYPE="FILE"
if [ -z "${STORAGE_LOCATION}" ]; then
@@ -57,12 +50,14 @@ else
echo "Using StorageType: $STORAGE_TYPE"
fi
-STORAGE_CONFIG_INFO="{\"storageType\": \"$STORAGE_TYPE\",
\"allowedLocations\": [\"$STORAGE_LOCATION\"]}"
+if [ -z "${STORAGE_CONFIG_INFO}" ]; then
+ STORAGE_CONFIG_INFO="{\"storageType\": \"$STORAGE_TYPE\",
\"allowedLocations\": [\"$STORAGE_LOCATION\"]}"
-if [[ "$STORAGE_TYPE" == "S3" ]]; then
- STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn
"$AWS_ROLE_ARN" '. + {roleArn: $roleArn}')
-elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then
- STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId
"$AZURE_TENANT_ID" '. + {tenantId: $tenantId}')
+ if [[ "$STORAGE_TYPE" == "S3" ]]; then
+ STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg roleArn
"$AWS_ROLE_ARN" '. + {roleArn: $roleArn}')
+ elif [[ "$STORAGE_TYPE" == "AZURE" ]]; then
+ STORAGE_CONFIG_INFO=$(echo "$STORAGE_CONFIG_INFO" | jq --arg tenantId
"$AZURE_TENANT_ID" '. + {tenantId: $tenantId}')
+ fi
fi
echo
@@ -82,7 +77,7 @@ PAYLOAD='{
echo $PAYLOAD
-curl -s -H "Authorization: Bearer ${token}" \
+curl -s -H "Authorization: Bearer ${TOKEN}" \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' \
-H "Polaris-Realm: $realm" \
diff --git a/getting-started/assets/polaris/obtain-token.sh
b/getting-started/assets/polaris/obtain-token.sh
new file mode 100755
index 000000000..a0d51776b
--- /dev/null
+++ b/getting-started/assets/polaris/obtain-token.sh
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+
+apk add --no-cache jq
+
+realm=${1:-"POLARIS"}
+
+TOKEN=$(curl -s http://polaris:8181/api/catalog/v1/oauth/tokens \
+ --user ${CLIENT_ID}:${CLIENT_SECRET} \
+ -H "Polaris-Realm: $realm" \
+ -d grant_type=client_credentials \
+ -d scope=PRINCIPAL_ROLE:ALL | jq -r .access_token)
+
+if [ -z "${TOKEN}" ]; then
+ echo "Failed to obtain access token."
+ exit 1
+fi
+
+export TOKEN
diff --git a/getting-started/minio/README.md b/getting-started/minio/README.md
new file mode 100644
index 000000000..5b4271458
--- /dev/null
+++ b/getting-started/minio/README.md
@@ -0,0 +1,90 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# Getting Started with Apache Polaris and MinIO
+
+## Overview
+
+This example uses MinIO as a storage provider with Polaris.
+
+Spark is used as a query engine. This example assumes a local Spark
installation.
+See the [Spark Notebooks Example](../spark/README.md) for a more advanced
Spark setup.
+
+## Starting the Example
+
+1. Build the Polaris server image if it's not already present locally:
+
+ ```shell
+ ./gradlew \
+ :polaris-server:assemble \
+ :polaris-server:quarkusAppPartsBuild --rerun \
+ -Dquarkus.container-image.build=true
+ ```
+
+2. Start the docker compose group by running the following command from the
root of the repository:
+
+ ```shell
+ docker compose -f getting-started/minio/docker-compose.yml up
+ ```
+
+## Connecting From Spark
+
+```shell
+bin/spark-sql \
+ --packages
org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0
\
+ --conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
\
+ --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \
+ --conf spark.sql.catalog.polaris.type=rest \
+ --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \
+ --conf spark.sql.catalog.polaris.token-refresh-enabled=false \
+ --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \
+ --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \
+ --conf
spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials
\
+ --conf spark.sql.catalog.polaris.credential=root:s3cr3t
+```
+
+Note: `s3cr3t` is defined as the password for the `root` users in the
`docker-compose.yml` file.
+
+## Running Queries
+
+Run inside the Spark SQL shell:
+
+```
+spark-sql (default)> use polaris;
+Time taken: 0.837 seconds
+
+spark-sql ()> create namespace ns;
+Time taken: 0.374 seconds
+
+spark-sql ()> create table ns.t1 as select 'abc';
+Time taken: 2.192 seconds
+
+spark-sql ()> select * from ns.t1;
+abc
+Time taken: 0.579 seconds, Fetched 1 row(s)
+```
+
+## MinIO Endpoints
+
+Note that the catalog configuration defined in the `docker-compose.yml`
contains
+different endpoints for the Polaris Server and the client (Spark).
Specifically,
+the client endpoint is `http://localhost:9000`, but `endpointInternal` is
`http://minio:9000`.
+
+This is necessary because clients running on `localhost` do not normally see
service
+names (such as `minio`) that are internal to the docker compose environment.
diff --git a/getting-started/minio/docker-compose.yml
b/getting-started/minio/docker-compose.yml
new file mode 100644
index 000000000..6731476ed
--- /dev/null
+++ b/getting-started/minio/docker-compose.yml
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+services:
+
+ minio:
+ image: quay.io/minio/minio:latest
+ ports:
+ # API port
+ - "9000:9000"
+ # UI port
+ - "9001:9001"
+ environment:
+ MINIO_ROOT_USER: minio_root
+ MINIO_ROOT_PASSWORD: m1n1opwd
+ command:
+ - "server"
+ - "/data"
+ - "--console-address"
+ - ":9001"
+ healthcheck:
+ test: ["CMD", "curl", "http://127.0.0.1:9000/minio/health/live"]
+ interval: 1s
+ timeout: 10s
+
+ polaris:
+ image: apache/polaris:latest
+ ports:
+ # API port
+ - "8181:8181"
+ # Optional, allows attaching a debugger to the Polaris JVM
+ - "5005:5005"
+ depends_on:
+ minio:
+ condition: service_healthy
+ environment:
+ JAVA_DEBUG: true
+ JAVA_DEBUG_PORT: "*:5005"
+ AWS_REGION: us-west-2
+ AWS_ACCESS_KEY_ID: minio_root
+ AWS_SECRET_ACCESS_KEY: m1n1opwd
+ POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t
+ polaris.realm-context.realms: POLARIS
+ quarkus.otel.sdk.disabled: "true"
+ healthcheck:
+ test: ["CMD", "curl", "http://localhost:8182/q/health"]
+ interval: 2s
+ timeout: 10s
+ retries: 10
+ start_period: 10s
+
+ setup_bucket:
+ image: quay.io/minio/mc:latest
+ depends_on:
+ minio:
+ condition: service_healthy
+ entrypoint: "/bin/sh"
+ command:
+ - "-c"
+ - >-
+ echo Creating MinIO bucket...;
+ mc alias set pol http://minio:9000 minio_root m1n1opwd;
+ mc mb pol/bucket123;
+ mc ls pol;
+ echo Bucket setup complete.;
+
+ polaris-setup:
+ image: alpine/curl
+ depends_on:
+ polaris:
+ condition: service_healthy
+ environment:
+ - CLIENT_ID=root
+ - CLIENT_SECRET=s3cr3t
+ volumes:
+ - ../assets/polaris/:/polaris
+ entrypoint: "/bin/sh"
+ command:
+ - "-c"
+ - >-
+ chmod +x /polaris/create-catalog.sh;
+ chmod +x /polaris/obtain-token.sh;
+ source /polaris/obtain-token.sh;
+ echo Creating catalog...;
+ export STORAGE_CONFIG_INFO='{"storageType":"S3",
+ "endpoint":"http://localhost:9000",
+ "endpointInternal":"http://minio:9000",
+ "pathStyleAccess":true}';
+ export STORAGE_LOCATION='s3://bucket123';
+ /polaris/create-catalog.sh POLARIS $$TOKEN;
+ echo Extra grants...;
+ curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type:
application/json' \
+ -X PUT \
+
http://polaris:8181/api/management/v1/catalogs/quickstart_catalog/catalog-roles/catalog_admin/grants
\
+ -d '{"type":"catalog", "privilege":"CATALOG_MANAGE_CONTENT"}';
+ echo Done.;
diff --git a/site/content/in-dev/unreleased/getting-started/minio.md
b/site/content/in-dev/unreleased/getting-started/minio.md
new file mode 100644
index 000000000..3eda7db62
--- /dev/null
+++ b/site/content/in-dev/unreleased/getting-started/minio.md
@@ -0,0 +1,115 @@
+---
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+Title: Deploying Polaris on MinIO
+type: docs
+weight: 350
+---
+
+In this guide we walk through setting up a simple Polaris Server with local
[MinIO](https://www.min.io/) storage.
+
+Similar configurations are expected to work with other S3-compatible systems
that also have the
+[STS](https://docs.aws.amazon.com/STS/latest/APIReference/welcome.html) API.
+
+# Setup
+
+Clone the Polaris source repository, then build a docker image for Polaris.
+
+```shell
+./gradlew :polaris-server:assemble -Dquarkus.container-image.build=true
+```
+
+Start MinIO with Polaris using the `docker compose` example.
+
+```shell
+docker compose -f getting-started/minio/docker-compose.yml up
+```
+
+The compose script will start MinIO on default ports (API on 9000, UI on 9001)
+plus a Polaris Server pre-configured to that MinIO instance.
+
+In this example the `root` principal has its password set to `s3cr3t`.
+
+# Connecting from Spark
+
+Start Spark.
+
+```shell
+export AWS_REGION=us-west-2
+
+bin/spark-sql \
+ --packages
org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0
\
+ --conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
\
+ --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \
+ --conf spark.sql.catalog.polaris.type=rest \
+ --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \
+ --conf spark.sql.catalog.polaris.token-refresh-enabled=false \
+ --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \
+ --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \
+ --conf
spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials
\
+ --conf spark.sql.catalog.polaris.credential=root:s3cr3t
+```
+
+Note: `AWS_REGION` is required by the AWS SDK used by Spark, but the value is
irrelevant in this case.
+
+Create a table in Spark.
+
+```sql
+use polaris;
+create namespace ns;
+create table ns.t1 as select 'abc';
+select * from ns.t1;
+```
+
+# Connecting from MinIO client
+
+```shell
+mc alias set pol http://localhost:9000 minio_root m1n1opwd
+mc ls pol/bucket123/ns/t1
+[2025-08-13 18:52:38 EDT] 0B data/
+[2025-08-13 18:52:38 EDT] 0B metadata/
+```
+
+Note: the values of `minio_root`, `m1n1opwd` and `bucket123` are defined in
the docker compose file.
+
+# Notes on Storage Configuation
+
+In this example the Polaris Catalog is defined as (excluding uninteresting
properties):
+
+```json
+ {
+ "name": "quickstart_catalog",
+ "storageConfigInfo": {
+ "endpoint": "http://localhost:9000",
+ "endpointInternal": "http://minio:9000",
+ "pathStyleAccess": true,
+ "storageType": "S3",
+ "allowedLocations": [
+ "s3://bucket123"
+ ]
+ }
+ }
+```
+
+Note that the `roleArn` parameter, which is required for AWS storage, does not
need to be set for MinIO.
+
+Note the two endpoint values. `endpointInternal` is used by the Polaris
Server, while `endpoint` is communicated
+to clients (such as Spark) in Iceberg REST API responses. This distinction
allows the system to work smoothly
+when the clients and the server have different views of the network (in this
example the host name `minio` is
+resolvable only inside the docker compose environment).
\ No newline at end of file