(fluss) branch main updated: [build] Update docker image dependencies for flink/fluss iceberg quickstart (#1813)

yuxia Mon, 20 Oct 2025 20:27:31 -0700

This is an automated email from the ASF dual-hosted git repository.

yuxia pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git



The following commit(s) were added to refs/heads/main by this push:
     new 258fb8183 [build] Update docker image dependencies for flink/fluss 
iceberg quickstart (#1813)
258fb8183 is described below

commit 258fb81838fce584eee38f4df33ceb4afa52cdf3
Author: MehulBatra <[email protected]>
AuthorDate: Tue Oct 21 08:57:23 2025 +0530

    [build] Update docker image dependencies for flink/fluss iceberg quickstart 
(#1813)
    
    ---------
    Co-authored-by: luoyuxia <[email protected]>
---
 docker/quickstart-flink/prepare_build.sh | 53 ++++++++++++++++++++++++--------
 fluss-lake/fluss-lake-iceberg/pom.xml    | 19 ++++++++++++
 website/docs/quickstart/flink-iceberg.md | 52 ++++++++++++++++---------------
 3 files changed, 86 insertions(+), 38 deletions(-)

diff --git a/docker/quickstart-flink/prepare_build.sh 
b/docker/quickstart-flink/prepare_build.sh
index f73d4cbf2..222cf35f3 100755
--- a/docker/quickstart-flink/prepare_build.sh
+++ b/docker/quickstart-flink/prepare_build.sh
@@ -109,6 +109,7 @@ check_prerequisites() {
     local required_dirs=(
         "$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target"
         "$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target"
+        "$PROJECT_ROOT/fluss-lake/fluss-lake-iceberg/target"
         "$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target"
     )
 
@@ -140,6 +141,7 @@ main() {
     log_info "Copying Fluss connector JARs..."
     copy_jar 
"$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target/fluss-flink-1.20-*.jar" 
"./lib" "fluss-flink-1.20 connector"
     copy_jar 
"$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target/fluss-lake-paimon-*.jar" 
"./lib" "fluss-lake-paimon connector"
+    copy_jar 
"$PROJECT_ROOT/fluss-lake/fluss-lake-iceberg/target/fluss-lake-iceberg-*.jar" 
"./lib" "fluss-lake-iceberg connector"
 
     # Download external dependencies
     log_info "Downloading external dependencies..."
@@ -151,12 +153,12 @@ main() {
         "" \
         "flink-faker-0.5.3"
 
-    # Download flink-shaded-hadoop-2-uber for Hadoop integration
+    # Download Hadoop for HDFS/local filesystem support
     download_jar \
         
"https://repo1.maven.org/maven2/io/trino/hadoop/hadoop-apache/3.3.5-2/hadoop-apache-3.3.5-2.jar";
 \
         "./lib/hadoop-apache-3.3.5-2.jar" \
         "508255883b984483a45ca48d5af6365d4f013bb8" \
-        "hadoop-apache-3.3.5-2.jar"
+        "hadoop-apache-3.3.5-2"
 
     # Download paimon-flink connector
     download_jar \
@@ -165,13 +167,24 @@ main() {
         "b9f8762c6e575f6786f1d156a18d51682ffc975c" \
         "paimon-flink-1.20-1.2.0"
 
+    # Iceberg Support
+    log_info "Downloading Iceberg connector JARs..."
+
+    # Download iceberg-flink-runtime for Flink 1.20 (version 1.9.1)
+    download_jar \
+        
"https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-1.20/1.9.1/iceberg-flink-runtime-1.20-1.9.1.jar";
 \
+        "./lib/iceberg-flink-runtime-1.20-1.9.1.jar" \
+        "" \
+        "iceberg-flink-runtime-1.20-1.9.1"
+
+
     # Prepare lake tiering JAR
     log_info "Preparing lake tiering JAR..."
     copy_jar 
"$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target/fluss-flink-tiering-*.jar"
 "./opt" "fluss-flink-tiering"
 
     # Final verification
     verify_jars
-    
+
     # Show summary
     show_summary
 }
@@ -179,34 +192,36 @@ main() {
 # Verify that all required JAR files are present
 verify_jars() {
     log_info "Verifying all required JAR files are present..."
-    
+
     local missing_jars=()
     local lib_jars=(
         "fluss-flink-1.20-*.jar"
         "fluss-lake-paimon-*.jar"
+        "fluss-lake-iceberg-*.jar"
         "flink-faker-0.5.3.jar"
         "hadoop-apache-3.3.5-2.jar"
         "paimon-flink-1.20-1.2.0.jar"
+        "iceberg-flink-runtime-1.20-1.9.1.jar"
     )
-    
+
     local opt_jars=(
         "fluss-flink-tiering-*.jar"
     )
-    
+
     # Check lib directory
     for jar_pattern in "${lib_jars[@]}"; do
         if ! ls ./lib/$jar_pattern >/dev/null 2>&1; then
             missing_jars+=("lib/$jar_pattern")
         fi
     done
-    
+
     # Check opt directory
     for jar_pattern in "${opt_jars[@]}"; do
         if ! ls ./opt/$jar_pattern >/dev/null 2>&1; then
             missing_jars+=("opt/$jar_pattern")
         fi
     done
-    
+
     # Report results
     if [ ${#missing_jars[@]} -eq 0 ]; then
         log_success "All required JAR files are present!"
@@ -224,11 +239,23 @@ show_summary() {
     log_success "JAR files preparation completed!"
     echo ""
     log_info "📦 Generated JAR files:"
-    echo "Lib directory:"
-    ls -la ./lib/ 2>/dev/null || echo "  (empty)"
-    echo "Opt directory:"
-    ls -la ./opt/ 2>/dev/null || echo "  (empty)"
+    echo ""
+    echo "Lib directory (Flink connectors):"
+    ls -lh ./lib/ | tail -n +2 | awk '{printf "  %-50s %8s\n", $9, $5}'
+    echo ""
+    echo "Opt directory (Tiering service):"
+    ls -lh ./opt/ | tail -n +2 | awk '{printf "  %-50s %8s\n", $9, $5}'
+    echo ""
+    log_info "📋 Included Components:"
+    echo "  ✓ Fluss Flink 1.20 connector"
+    echo "  ✓ Fluss Lake Paimon connector"
+    echo "  ✓ Fluss Lake Iceberg connector"
+    echo "  ✓ Iceberg Flink runtime 1.20 (v1.9.1)"
+    echo "  ✓ Paimon Flink 1.20 (v1.2.0)"
+    echo "  ✓ Hadoop Apache (v3.3.5-2)"
+    echo "  ✓ Flink Faker (v0.5.3)"
+    echo "  ✓ Fluss Tiering service"
 }
 
 # Run main function
-main "$@"
+main "$@"
\ No newline at end of file
diff --git a/fluss-lake/fluss-lake-iceberg/pom.xml 
b/fluss-lake/fluss-lake-iceberg/pom.xml
index 94bca76b1..1c13e9db3 100644
--- a/fluss-lake/fluss-lake-iceberg/pom.xml
+++ b/fluss-lake/fluss-lake-iceberg/pom.xml
@@ -318,12 +318,31 @@
                                     <include>*:*</include>
                                 </includes>
                             </artifactSet>
+                            <relocations>
+                                <!-- Shade Iceberg to Fluss namespace for 
complete isolation -->
+                                <relocation>
+                                    <pattern>org.apache.iceberg</pattern>
+                                    
<shadedPattern>org.apache.fluss.lake.iceberg.shaded.org.apache.iceberg</shadedPattern>
+                                </relocation>
+                                <!-- Shade Jackson to Fluss namespace -->
+                                <relocation>
+                                    <pattern>com.fasterxml.jackson</pattern>
+                                    
<shadedPattern>org.apache.fluss.lake.iceberg.shaded.com.fasterxml.jackson</shadedPattern>
+                                </relocation>
+                                <!-- Shade Parquet to Fluss namespace -->
+                                <relocation>
+                                    <pattern>org.apache.parquet</pattern>
+                                    
<shadedPattern>org.apache.fluss.lake.iceberg.shaded.org.apache.parquet</shadedPattern>
+                                </relocation>
+                            </relocations>
                             <filters>
                                 <filter>
                                     <artifact>*</artifact>
                                     <excludes>
                                         <exclude>LICENSE</exclude>
                                         <exclude>NOTICE</exclude>
+                                        <!-- Exclude multi-release JAR 
versions that cause shading issues -->
+                                        <exclude>META-INF/versions/**</exclude>
                                     </excludes>
                                 </filter>
                             </filters>
diff --git a/website/docs/quickstart/flink-iceberg.md 
b/website/docs/quickstart/flink-iceberg.md
index 7a3ac8789..26d44ada4 100644
--- a/website/docs/quickstart/flink-iceberg.md
+++ b/website/docs/quickstart/flink-iceberg.md
@@ -63,7 +63,6 @@ services:
 
   coordinator-server:
     image: fluss/fluss:$FLUSS_DOCKER_VERSION$
-    command: coordinatorServer
     depends_on:
       - zookeeper
     environment:
@@ -401,8 +400,7 @@ CREATE TABLE datalake_enriched_orders (
     `cust_phone` STRING,
     `cust_acctbal` DECIMAL(15, 2),
     `cust_mktsegment` STRING,
-    `nation_name` STRING,
-    PRIMARY KEY (`order_key`) NOT ENFORCED
+    `nation_name` STRING
 ) WITH (
     'table.datalake.enabled' = 'true',
     'table.datalake.freshness' = '30s'
@@ -429,11 +427,14 @@ SELECT o.order_key,
        c.acctbal,
        c.mktsegment,
        n.name
-FROM fluss_order o
-       LEFT JOIN fluss_customer FOR SYSTEM_TIME AS OF `o`.`ptime` AS `c`
-                 ON o.cust_key = c.cust_key
-       LEFT JOIN fluss_nation FOR SYSTEM_TIME AS OF `o`.`ptime` AS `n`
-                 ON c.nation_key = n.nation_key;
+FROM (
+    SELECT *, PROCTIME() as ptime
+    FROM `default_catalog`.`default_database`.source_order
+) o
+LEFT JOIN fluss_customer FOR SYSTEM_TIME AS OF o.ptime AS c
+    ON o.cust_key = c.cust_key
+LEFT JOIN fluss_nation FOR SYSTEM_TIME AS OF o.ptime AS n
+    ON c.nation_key = n.nation_key;
 ```
 
 ### Real-Time Analytics on Fluss datalake-enabled Tables
@@ -459,11 +460,12 @@ SELECT snapshot_id, operation FROM 
datalake_enriched_orders$lake$snapshots;
 
 **Sample Output:**
 ```shell
-+-------------+--------------------+
-| snapshot_id |          operation |
-+-------------+--------------------+
-|           1 |             append |
-+-------------+--------------------+
++---------------------+-----------+
+|         snapshot_id | operation |
++---------------------+-----------+
+| 7792523713868625335 |    append |
+| 7960217942125627573 |    append |
++---------------------+-----------+
 ```
 **Note:** Make sure to wait for the configured `datalake.freshness` (~30s) to 
complete before querying the snapshots, otherwise the result will be empty.
 
@@ -474,11 +476,11 @@ SELECT sum(total_price) as sum_price FROM 
datalake_enriched_orders$lake;
 ```
 **Sample Output:**
 ```shell
-+------------+
-|  sum_price |
-+------------+
-| 1669519.92 |
-+------------+
++-----------+
+| sum_price |
++-----------+
+| 432880.93 |
++-----------+
 ```
 
 To achieve results with sub-second data freshness, you can query the table 
directly, which seamlessly unifies data from both Fluss and Iceberg:
@@ -490,23 +492,23 @@ SELECT sum(total_price) as sum_price FROM 
datalake_enriched_orders;
 
 **Sample Output:**
 ```shell
-+------------+
-|  sum_price |
-+------------+
-| 1777908.36 |
-+------------+
++-----------+
+| sum_price |
++-----------+
+| 558660.03 |
++-----------+
 ```
 
 You can execute the real-time analytics query multiple times, and the results 
will vary with each run as new data is continuously written to Fluss in 
real-time.
 
 Finally, you can use the following command to view the files stored in Iceberg:
 ```shell
-docker compose exec taskmanager tree /tmp/iceberg/fluss.db
+docker compose exec taskmanager tree /tmp/iceberg/fluss
 ```
 
 **Sample Output:**
 ```shell
-/tmp/iceberg/fluss.db
+/tmp/iceberg/fluss
 └── datalake_enriched_orders
     ├── data
     │   └── 00000-0-abc123.parquet

(fluss) branch main updated: [build] Update docker image dependencies for flink/fluss iceberg quickstart (#1813)

Reply via email to