luoyuxia commented on code in PR #2569:
URL: https://github.com/apache/fluss/pull/2569#discussion_r2781678079


##########
website/docs/quickstart/lakehouse.md:
##########


Review Comment:
   with rustfs, we can remove `sed -i 's/exec $(drop_privs_cmd)//g' 
/docker-entrypoint.sh` now. 
   It's introduce to avoid permision exception when with local filesystem.



##########
website/docs/quickstart/lakehouse.md:
##########
@@ -38,50 +38,86 @@ cd fluss-quickstart-paimon
 mkdir -p lib opt
 
 # Flink connectors
-wget -O lib/flink-faker-0.5.3.jar 
https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar
-wget -O "lib/fluss-flink-1.20-$FLUSS_DOCKER_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/fluss/fluss-flink-1.20/$FLUSS_DOCKER_VERSION$/fluss-flink-1.20-$FLUSS_DOCKER_VERSION$.jar";
-wget -O "lib/paimon-flink-1.20-$PAIMON_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/paimon/paimon-flink-1.20/$PAIMON_VERSION$/paimon-flink-1.20-$PAIMON_VERSION$.jar";
+curl -fL -o lib/flink-faker-0.5.3.jar 
https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar
+curl -fL -o "lib/fluss-flink-1.20-$FLUSS_DOCKER_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/fluss/fluss-flink-1.20/$FLUSS_DOCKER_VERSION$/fluss-flink-1.20-$FLUSS_DOCKER_VERSION$.jar";
+curl -fL -o "lib/paimon-flink-1.20-$PAIMON_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/paimon/paimon-flink-1.20/$PAIMON_VERSION$/paimon-flink-1.20-$PAIMON_VERSION$.jar";
 
 # Fluss lake plugin
-wget -O "lib/fluss-lake-paimon-$FLUSS_DOCKER_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/fluss/fluss-lake-paimon/$FLUSS_DOCKER_VERSION$/fluss-lake-paimon-$FLUSS_DOCKER_VERSION$.jar";
+curl -fL -o "lib/fluss-lake-paimon-$FLUSS_DOCKER_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/fluss/fluss-lake-paimon/$FLUSS_DOCKER_VERSION$/fluss-lake-paimon-$FLUSS_DOCKER_VERSION$.jar";
 
 # Paimon bundle jar
-wget -O "lib/paimon-bundle-$PAIMON_VERSION$.jar" 
"https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-bundle/$PAIMON_VERSION$/paimon-bundle-$PAIMON_VERSION$.jar";
+curl -fL -o "lib/paimon-bundle-$PAIMON_VERSION$.jar" 
"https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-bundle/$PAIMON_VERSION$/paimon-bundle-$PAIMON_VERSION$.jar";
 
 # Hadoop bundle jar
-wget -O lib/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar 
https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
+curl -fL -o lib/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar 
https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
+
+# AWS S3 support
+curl -fL -o "lib/paimon-s3-$PAIMON_VERSION$.jar" 
"https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-s3/$PAIMON_VERSION$/paimon-s3-$PAIMON_VERSION$.jar";
 
 # Tiering service
-wget -O "opt/fluss-flink-tiering-$FLUSS_DOCKER_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/fluss/fluss-flink-tiering/$FLUSS_DOCKER_VERSION$/fluss-flink-tiering-$FLUSS_DOCKER_VERSION$.jar";
+curl -fL -o "opt/fluss-flink-tiering-$FLUSS_DOCKER_VERSION$.jar" 
"https://repo1.maven.org/maven2/org/apache/fluss/fluss-flink-tiering/$FLUSS_DOCKER_VERSION$/fluss-flink-tiering-$FLUSS_DOCKER_VERSION$.jar";
 ```
 
 :::info
 You can add more jars to this `lib` directory based on your requirements:
-- **Cloud storage support**: For AWS S3 integration with Paimon, add the 
corresponding 
[paimon-s3](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-s3/$PAIMON_VERSION$/paimon-s3-$PAIMON_VERSION$.jar)
 - **Other catalog backends**: Add jars needed for alternative Paimon catalog 
implementations (e.g., Hive, JDBC)
   :::
 
 3. Create a `docker-compose.yml` file with the following content:
 
 ```yaml
 services:
+  #begin RustFS (S3-compatible storage)
+  rustfs:
+    image: rustfs/rustfs:latest
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    environment:
+      - RUSTFS_ACCESS_KEY=rustfsadmin
+      - RUSTFS_SECRET_KEY=rustfsadmin
+      - RUSTFS_CONSOLE_ENABLE=true
+    volumes:
+      - rustfs-data:/data
+    command: /data
+  rustfs-init:
+    image: minio/mc
+    depends_on:
+      - rustfs
+    entrypoint: >
+      /bin/sh -c "
+      until mc alias set rustfs http://rustfs:9000 rustfsadmin rustfsadmin; do
+        echo 'Waiting for RustFS...';
+        sleep 1;
+      done;
+      mc mb --ignore-existing rustfs/fluss;
+      "
+  #end
   coordinator-server:
     image: apache/fluss:$FLUSS_DOCKER_VERSION$
     command: coordinatorServer
     depends_on:
       - zookeeper
+      - rustfs-init
     environment:
       - |
         FLUSS_PROPERTIES=
         zookeeper.address: zookeeper:2181
         bind.listeners: FLUSS://coordinator-server:9123
-        remote.data.dir: /tmp/fluss/remote-data
+        remote.data.dir: s3://fluss/remote-data
+        s3.endpoint: http://rustfs:9000
+        s3.access-key: rustfsadmin
+        s3.secret-key: rustfsadmin
+        s3.path.style.access: true
         datalake.format: paimon
         datalake.paimon.metastore: filesystem
-        datalake.paimon.warehouse: /tmp/paimon
+        datalake.paimon.warehouse: s3://fluss/paimon
+        datalake.paimon.s3.endpoint: http://rustfs:9000
+        datalake.paimon.s3.access-key: rustfsadmin
+        datalake.paimon.s3.secret-key: rustfsadmin
+        datalake.paimon.s3.path.style.access: true
     volumes:
-      - shared-tmpfs:/tmp/paimon
-      - shared-tmpfs:/tmp/fluss
+      - ./lib/paimon-s3-1.3.1.jar:/opt/fluss/plugins/paimon/paimon-s3-1.3.1.jar

Review Comment:
   use `$PAIMON_VERSION$`



##########
website/docs/quickstart/lakehouse.md:
##########


Review Comment:
   with rustfs, we can remove sed -i 's/exec $(drop_privs_cmd)//g' 
/docker-entrypoint.sh now.
   It's introduce to avoid permision exception when with local filesystem.



##########
website/docs/quickstart/lakehouse.md:
##########
@@ -276,12 +358,14 @@ services:
        cp /tmp/opt/*.jar /opt/flink/opt/ 2>/dev/null || true;
        /docker-entrypoint.sh jobmanager"
     environment:
+      - AWS_ACCESS_KEY_ID=rustfsadmin

Review Comment:
   curious about why need it in the enviroment?



##########
website/docs/quickstart/lakehouse.md:
##########


Review Comment:
   The same to iceberg part.



##########
website/docs/quickstart/lakehouse.md:
##########
@@ -888,6 +997,75 @@ The files adhere to Iceberg's standard format, enabling 
seamless querying with o
   </TabItem>
 </Tabs>
 
+### Tiered Storage

Review Comment:
   I just found that the output maybe too much. I think if users want to know 
the paimon/iceberg data, they can just go to rust fs console to see?
   Can we just remove this part?



##########
website/docs/quickstart/lakehouse.md:
##########
@@ -93,14 +129,21 @@ services:
         zookeeper.address: zookeeper:2181
         bind.listeners: FLUSS://tablet-server:9123
         data.dir: /tmp/fluss/data
-        remote.data.dir: /tmp/fluss/remote-data
+        remote.data.dir: s3://fluss/remote-data
+        s3.endpoint: http://rustfs:9000
+        s3.access-key: rustfsadmin
+        s3.secret-key: rustfsadmin
+        s3.path.style.access: true
         kv.snapshot.interval: 0s
         datalake.format: paimon
         datalake.paimon.metastore: filesystem
-        datalake.paimon.warehouse: /tmp/paimon
+        datalake.paimon.warehouse: s3://fluss/paimon
+        datalake.paimon.s3.endpoint: http://rustfs:9000
+        datalake.paimon.s3.access-key: rustfsadmin
+        datalake.paimon.s3.secret-key: rustfsadmin
+        datalake.paimon.s3.path.style.access: true
     volumes:
-      - shared-tmpfs:/tmp/paimon
-      - shared-tmpfs:/tmp/fluss
+      - ./lib/paimon-s3-1.3.1.jar:/opt/fluss/plugins/paimon/paimon-s3-1.3.1.jar

Review Comment:
   dito



##########
website/docs/quickstart/lakehouse.md:
##########
@@ -224,25 +264,62 @@ You can add more jars to this `lib` directory based on 
your requirements:
 
 ```yaml
 services:
+  #begin RustFS (S3-compatible storage)
+  rustfs:
+    image: rustfs/rustfs:latest
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    environment:
+      - RUSTFS_ACCESS_KEY=rustfsadmin
+      - RUSTFS_SECRET_KEY=rustfsadmin
+      - RUSTFS_CONSOLE_ENABLE=true
+    volumes:
+      - rustfs-data:/data
+    command: /data
+  rustfs-init:
+    image: minio/mc
+    depends_on:
+      - rustfs
+    entrypoint: >
+      /bin/sh -c "
+      until mc alias set rustfs http://rustfs:9000 rustfsadmin rustfsadmin; do
+        echo 'Waiting for RustFS...';
+        sleep 1;
+      done;
+      mc mb --ignore-existing rustfs/fluss;
+      "
+  #end
   coordinator-server:
     image: apache/fluss:$FLUSS_DOCKER_VERSION$
     command: coordinatorServer
     depends_on:
       - zookeeper
+      - rustfs-init
     environment:
       - |
         FLUSS_PROPERTIES=
         zookeeper.address: zookeeper:2181
         bind.listeners: FLUSS://coordinator-server:9123
-        remote.data.dir: /tmp/fluss/remote-data
+        remote.data.dir: s3://fluss/remote-data
+        s3.endpoint: http://rustfs:9000
+        s3.access-key: rustfsadmin
+        s3.secret-key: rustfsadmin
+        s3.path.style.access: true
         datalake.format: iceberg
         datalake.iceberg.type: hadoop
-        datalake.iceberg.warehouse: /tmp/iceberg
+        datalake.iceberg.warehouse: s3a://fluss/iceberg
+        datalake.iceberg.iceberg.hadoop.fs.s3a.endpoint: http://rustfs:9000
+        datalake.iceberg.iceberg.hadoop.fs.s3a.access.key: rustfsadmin
+        datalake.iceberg.iceberg.hadoop.fs.s3a.secret.key: rustfsadmin
+        datalake.iceberg.iceberg.hadoop.fs.s3a.path.style.access: true
     volumes:
-      - shared-tmpfs:/tmp/iceberg
-      - shared-tmpfs:/tmp/fluss
       - 
./lib/fluss-lake-iceberg-$FLUSS_DOCKER_VERSION$.jar:/opt/fluss/plugins/iceberg/fluss-lake-iceberg-$FLUSS_DOCKER_VERSION$.jar
-      - 
./lib/hadoop-apache-3.3.5-2.jar:/opt/fluss/plugins/iceberg/hadoop-apache-3.3.5-2.jar
+      - 
./lib/hadoop-client-api-3.3.5.jar:/opt/fluss/plugins/iceberg/hadoop-client-api-3.3.5.jar

Review Comment:
   just wonder can `hadoop-apache-3.3.5-2.jar` replace 
   ```
   hadoop-client-api
   hadoop-client-runtime
   commons-logging
   ```
   ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to