This is an automated email from the ASF dual-hosted git repository.

peacewong pushed a commit to branch dev-1.4.0
in repository https://gitbox.apache.org/repos/asf/linkis.git


The following commit(s) were added to refs/heads/dev-1.4.0 by this push:
     new e09e89706 spark etl support datalake and add scala-2.12 module (#4538)
e09e89706 is described below

commit e09e89706dde69bf8a06c34aa97e30afa8047d5f
Author: ChengJie1053 <[email protected]>
AuthorDate: Fri May 19 11:38:15 2023 +0800

    spark etl support datalake and add scala-2.12 module (#4538)
---
 docs/configuration/spark.md                        |   1 +
 linkis-engineconn-plugins/spark/scala-2.12/pom.xml | 167 +++++++++++++++++++++
 .../scala-2.12/src/main/assembly/distribution.xml  |  44 ++++++
 .../scala-2.12/src/test/resources/etltest.dolphin  |   4 +
 .../spark/datacalc/TestDeltaCalc.scala             | 141 +++++++++++++++++
 .../engineplugin/spark/datacalc/TestHudiCalc.scala | 146 ++++++++++++++++++
 .../spark/datacalc/sink/DataLakeSinkConfig.java    |  71 +++++++++
 .../datacalc/source/DataLakeSourceConfig.java      |  56 +++++++
 .../spark/datacalc/util/PluginUtil.java            |   2 +
 .../spark/config/SparkConfiguration.scala          |   2 +
 .../spark/datacalc/sink/DataLakeSink.scala         |  37 +++++
 .../spark/datacalc/source/DataLakeSource.scala     |  41 +++++
 .../spark/factory/SparkEngineConnFactory.scala     |   4 +
 13 files changed, 716 insertions(+)

diff --git a/docs/configuration/spark.md b/docs/configuration/spark.md
index 99c07a12b..ed2024b98 100644
--- a/docs/configuration/spark.md
+++ b/docs/configuration/spark.md
@@ -3,6 +3,7 @@
 
 | Module Name (Service Name) | Parameter Name | Default Value | Description 
|Used|
 | -------- | -------- | ----- |----- |  -----   |
+|spark|linkis.spark.etl.support.hudi|false|spark.etl.support.hudi|
 
|spark|linkis.bgservice.store.prefix|hdfs:///tmp/bdp-ide/|bgservice.store.prefix|
 |spark|linkis.bgservice.store.suffix|  |bgservice.store.suffix|
 |spark|wds.linkis.dolphin.decimal.precision|32 |dolphin.decimal.precision|
diff --git a/linkis-engineconn-plugins/spark/scala-2.12/pom.xml 
b/linkis-engineconn-plugins/spark/scala-2.12/pom.xml
new file mode 100644
index 000000000..aa8f5b75a
--- /dev/null
+++ b/linkis-engineconn-plugins/spark/scala-2.12/pom.xml
@@ -0,0 +1,167 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~ 
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~ 
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.linkis</groupId>
+    <artifactId>linkis</artifactId>
+    <version>${revision}</version>
+    <relativePath>../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>linkis-engineplugin-spark-scala-2.12</artifactId>
+
+  <properties>
+    <scala.binary.version>2.12</scala.binary.version>
+    <delta.version>2.0.2</delta.version>
+    <hudi.version>0.13.0</hudi.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.linkis</groupId>
+      <artifactId>linkis-engineplugin-spark</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>${hadoop-hdfs-client.artifact}</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-annotations</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-databind</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-client</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.google.protobuf</groupId>
+          <artifactId>protobuf-java</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.janino</groupId>
+          <artifactId>commons-compiler</artifactId>
+        </exclusion>
+
+        <exclusion>
+          <groupId>org.codehaus.janino</groupId>
+          <artifactId>janino</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.codehaus.janino</groupId>
+      <artifactId>janino</artifactId>
+      <version>3.0.9</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>io.delta</groupId>
+      <artifactId>delta-core_${scala.binary.version}</artifactId>
+      <version>${delta.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-spark3.2-bundle_${scala.binary.version}</artifactId>
+      <version>${hudi.version}</version>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <excludes>
+          <exclude>**/*.properties</exclude>
+          <exclude>**/log4j2.xml</exclude>
+        </excludes>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <inherited>false</inherited>
+        <configuration>
+          <skipAssembly>false</skipAssembly>
+          <finalName>out</finalName>
+          <appendAssemblyId>false</appendAssemblyId>
+          <attach>false</attach>
+          <descriptors>
+            <descriptor>src/main/assembly/distribution.xml</descriptor>
+          </descriptors>
+        </configuration>
+        <executions>
+          <execution>
+            <id>make-assembly</id>
+            <goals>
+              <goal>single</goal>
+            </goals>
+            <phase>package</phase>
+            <configuration>
+              <descriptors>
+                <descriptor>src/main/assembly/distribution.xml</descriptor>
+              </descriptors>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git 
a/linkis-engineconn-plugins/spark/scala-2.12/src/main/assembly/distribution.xml 
b/linkis-engineconn-plugins/spark/scala-2.12/src/main/assembly/distribution.xml
new file mode 100644
index 000000000..c67c3b3e8
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/scala-2.12/src/main/assembly/distribution.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~ 
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~ 
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.1.1"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+          xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.1.1 
https://maven.apache.org/xsd/assembly-2.1.1.xsd";>
+    <id>linkis-enginePlugin-spark</id>
+    <formats>
+        <format>dir</format>
+        <format>zip</format>
+    </formats>
+    <includeBaseDirectory>true</includeBaseDirectory>
+    <baseDirectory>spark</baseDirectory>
+
+    <dependencySets>
+        <dependencySet>
+            <!-- Enable access to all projects in the current multimodule 
build! <useAllReactorProjects>true</useAllReactorProjects> -->
+            <!-- Now, select which projects to include in this module-set. -->
+            <outputDirectory>/dist/${spark.version}/lib</outputDirectory>
+            <useProjectArtifact>true</useProjectArtifact>
+            <useTransitiveDependencies>true</useTransitiveDependencies>
+            <unpack>false</unpack>
+            <useStrictFiltering>false</useStrictFiltering>
+            <useTransitiveFiltering>true</useTransitiveFiltering>
+
+        </dependencySet>
+    </dependencySets>
+
+</assembly>
+
diff --git 
a/linkis-engineconn-plugins/spark/scala-2.12/src/test/resources/etltest.dolphin 
b/linkis-engineconn-plugins/spark/scala-2.12/src/test/resources/etltest.dolphin
new file mode 100644
index 000000000..7b065bc06
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/scala-2.12/src/test/resources/etltest.dolphin
@@ -0,0 +1,4 @@
+name;age
+Michael;29
+Andy;30
+Justin;19
\ No newline at end of file
diff --git 
a/linkis-engineconn-plugins/spark/scala-2.12/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestDeltaCalc.scala
 
b/linkis-engineconn-plugins/spark/scala-2.12/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestDeltaCalc.scala
new file mode 100644
index 000000000..344edf45b
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/scala-2.12/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestDeltaCalc.scala
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc
+
+import org.apache.linkis.common.io.FsPath
+import org.apache.linkis.engineplugin.spark.datacalc.model.DataCalcGroupData
+
+import org.junit.jupiter.api.{Assertions, Test}
+
+class TestDelta {
+
+  val filePath = this.getClass.getResource("/").getFile
+
+  @Test
+  def testDataLakeWrite: Unit = {
+    // skip os: windows
+    if (!FsPath.WINDOWS) {
+      val data = 
DataCalcGroupData.getData(writeConfigJson.replace("{filePath}", filePath))
+      Assertions.assertTrue(data != null)
+
+      val (sources, transforms, sinks) = DataCalcExecution.getPlugins(data)
+      Assertions.assertTrue(sources != null)
+      Assertions.assertTrue(transforms != null)
+      Assertions.assertTrue(sinks != null)
+    }
+  }
+
+  @Test
+  def testDataLakeReader: Unit = {
+    // skip os: windows
+    if (!FsPath.WINDOWS) {
+      val data = 
DataCalcGroupData.getData(readerConfigJson.replace("{filePath}", filePath))
+      Assertions.assertTrue(data != null)
+
+      val (sources, transforms, sinks) = DataCalcExecution.getPlugins(data)
+      Assertions.assertTrue(sources != null)
+      Assertions.assertTrue(transforms != null)
+      Assertions.assertTrue(sinks != null)
+    }
+  }
+
+  val writeConfigJson =
+    """
+      |{
+      |    "sources": [
+      |        {
+      |            "name": "file",
+      |            "type": "source",
+      |            "config": {
+      |                "resultTable": "T1654611700631",
+      |                "path": "file://{filePath}/etltest.dolphin",
+      |                "serializer": "csv",
+      |                "options": {
+      |                "header":"true",
+      |                "delimiter":";"
+      |                },
+      |                "columnNames": ["name", "age"]
+      |            }
+      |        }
+      |    ],
+      |    "transformations": [
+      |        {
+      |            "name": "sql",
+      |            "type": "transformation",
+      |            "config": {
+      |                "resultTable": "T111",
+      |                "sql": "select * from T1654611700631"
+      |            }
+      |        }
+      |    ],
+      |    "sinks": [
+      |        {
+      |            "name": "datalake",
+      |            "config": {
+      |                "sourceTable": "T1654611700631",
+      |                "tableFormat": "delta",
+      |                "path": "file://{filePath}/delta",
+      |                "saveMode": "overwrite"
+      |            }
+      |        }
+      |    ]
+      |}
+      |""".stripMargin
+
+  val readerConfigJson =
+    """
+      |{
+      |    "sources": [
+      |        {
+      |            "name": "datalake",
+      |            "type": "source",
+      |            "config": {
+      |                "resultTable": "T1654611700631",
+      |                "tableFormat": "delta",
+      |                "path": "file://{filePath}/delta",
+      |            }
+      |        }
+      |    ],
+      |    "transformations": [
+      |        {
+      |            "name": "sql",
+      |            "type": "transformation",
+      |            "config": {
+      |                "resultTable": "T111",
+      |                "sql": "select * from T1654611700631"
+      |            }
+      |        }
+      |    ],
+      |    "sinks": [
+      |        {
+      |            "name": "file",
+      |            "config": {
+      |                "sourceTable": "T1654611700631",
+      |                "path": "file://{filePath}/csv",
+      |                "saveMode": "overwrite",
+      |                "options": {
+      |                "header":"true"
+      |                },
+      |                "serializer": "csv"
+      |            }
+      |        }
+      |    ]
+      |}
+      |""".stripMargin
+
+}
diff --git 
a/linkis-engineconn-plugins/spark/scala-2.12/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestHudiCalc.scala
 
b/linkis-engineconn-plugins/spark/scala-2.12/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestHudiCalc.scala
new file mode 100644
index 000000000..344ae173a
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/scala-2.12/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestHudiCalc.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc
+
+import org.apache.linkis.common.io.FsPath
+import org.apache.linkis.engineplugin.spark.datacalc.model.DataCalcGroupData
+
+import org.junit.jupiter.api.{Assertions, Test};
+
+class TestHudi {
+
+  val filePath = this.getClass.getResource("/").getFile
+
+  @Test
+  def testDataLakeWrite: Unit = {
+    // skip os: windows
+    if (!FsPath.WINDOWS) {
+      val data = 
DataCalcGroupData.getData(writeConfigJson.replace("{filePath}", filePath))
+      Assertions.assertTrue(data != null)
+
+      val (sources, transforms, sinks) = DataCalcExecution.getPlugins(data)
+      Assertions.assertTrue(sources != null)
+      Assertions.assertTrue(transforms != null)
+      Assertions.assertTrue(sinks != null)
+    }
+  }
+
+  @Test
+  def testDataLakeReader: Unit = {
+    // skip os: windows
+    if (!FsPath.WINDOWS) {
+      val data = 
DataCalcGroupData.getData(readerConfigJson.replace("{filePath}", filePath))
+      Assertions.assertTrue(data != null)
+
+      val (sources, transforms, sinks) = DataCalcExecution.getPlugins(data)
+      Assertions.assertTrue(sources != null)
+      Assertions.assertTrue(transforms != null)
+      Assertions.assertTrue(sinks != null)
+    }
+  }
+
+  val writeConfigJson =
+    """
+      |{
+      |    "sources": [
+      |        {
+      |            "name": "file",
+      |            "type": "source",
+      |            "config": {
+      |                "resultTable": "T1654611700631",
+      |                "path": "file://{filePath}/etltest.dolphin",
+      |                "serializer": "csv",
+      |                "options": {
+      |                "header":"true",
+      |                "delimiter":";"
+      |                },
+      |                "columnNames": ["name", "age"]
+      |            }
+      |        }
+      |    ],
+      |    "transformations": [
+      |        {
+      |            "name": "sql",
+      |            "type": "transformation",
+      |            "config": {
+      |                "resultTable": "T111",
+      |                "sql": "select * from T1654611700631"
+      |            }
+      |        }
+      |    ],
+      |    "sinks": [
+      |        {
+      |            "name": "datalake",
+      |            "config": {
+      |                "sourceTable": "T1654611700631",
+      |                "tableFormat": "hudi",
+      |                "options": {
+      |                "hoodie.table.name":"huditest",
+      |                "hoodie.datasource.write.recordkey.field":"age",
+      |                "hoodie.datasource.write.precombine.field":"age"
+      |                },
+      |                "path": "file://{filePath}/hudi",
+      |                "saveMode": "append"
+      |            }
+      |        }
+      |    ]
+      |}
+      |""".stripMargin
+
+  val readerConfigJson =
+    """
+      |{
+      |    "sources": [
+      |        {
+      |            "name": "datalake",
+      |            "type": "source",
+      |            "config": {
+      |                "resultTable": "T1654611700631",
+      |                "tableFormat": "hudi",
+      |                "path": "file://{filePath}/hudi",
+      |            }
+      |        }
+      |    ],
+      |    "transformations": [
+      |        {
+      |            "name": "sql",
+      |            "type": "transformation",
+      |            "config": {
+      |                "resultTable": "T111",
+      |                "sql": "select * from T1654611700631"
+      |            }
+      |        }
+      |    ],
+      |    "sinks": [
+      |        {
+      |            "name": "file",
+      |            "config": {
+      |                "sourceTable": "T1654611700631",
+      |                "path": "file://{filePath}/csv",
+      |                "saveMode": "overwrite",
+      |                "options": {
+      |                "header":"true"
+      |                },
+      |                "serializer": "csv"
+      |            }
+      |        }
+      |    ]
+      |}
+      |""".stripMargin
+
+}
diff --git 
a/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/sink/DataLakeSinkConfig.java
 
b/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/sink/DataLakeSinkConfig.java
new file mode 100644
index 000000000..03f808849
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/sink/DataLakeSinkConfig.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc.sink;
+
+import org.apache.linkis.engineplugin.spark.datacalc.model.SinkConfig;
+
+import javax.validation.constraints.NotBlank;
+import javax.validation.constraints.Pattern;
+
+public class DataLakeSinkConfig extends SinkConfig {
+
+  @NotBlank
+  @Pattern(
+      regexp = "^(((file|hdfs)://)|/).*",
+      message =
+          "Invalid path URI, please set the following allowed schemas: 
'file://' or 'hdfs://'(default).")
+  private String path;
+
+  @NotBlank
+  @Pattern(
+      regexp = "^(delta|hudi)$",
+      message = "Unknown table format: {saveMode}. Accepted save modes are 
'delta', 'hudi'.")
+  private String tableFormat = "delta";
+
+  @NotBlank
+  @Pattern(
+      regexp = "^(overwrite|append|ignore|error|errorifexists)$",
+      message =
+          "Unknown save mode: {saveMode}. Accepted save modes are 'overwrite', 
'append', 'ignore', 'error', 'errorifexists'.")
+  private String saveMode = "overwrite";
+
+  public String getPath() {
+    if (path.startsWith("/")) return "hdfs://" + path;
+    return path;
+  }
+
+  public void setPath(String path) {
+    this.path = path;
+  }
+
+  public String getSaveMode() {
+    return saveMode;
+  }
+
+  public void setSaveMode(String saveMode) {
+    this.saveMode = saveMode;
+  }
+
+  public String getTableFormat() {
+    return tableFormat;
+  }
+
+  public void setTableFormat(String tableFormat) {
+    this.tableFormat = tableFormat;
+  }
+}
diff --git 
a/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/source/DataLakeSourceConfig.java
 
b/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/source/DataLakeSourceConfig.java
new file mode 100644
index 000000000..b8fed0a7a
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/source/DataLakeSourceConfig.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc.source;
+
+import org.apache.linkis.engineplugin.spark.datacalc.model.SourceConfig;
+
+import javax.validation.constraints.NotBlank;
+import javax.validation.constraints.Pattern;
+
+public class DataLakeSourceConfig extends SourceConfig {
+
+  @NotBlank
+  @Pattern(
+      regexp = "^(((file|hdfs)://)|/).*",
+      message =
+          "Invalid path URI, please set the following allowed schemas: 
'file://' or 'hdfs://'(default).")
+  private String path;
+
+  @NotBlank
+  @Pattern(
+      regexp = "^(delta|hudi)$",
+      message = "Unknown table format: {saveMode}. Accepted save modes are 
'delta', 'hudi'.")
+  private String tableFormat = "delta";
+
+  public String getPath() {
+    if (path.startsWith("/")) return "hdfs://" + path;
+    return path;
+  }
+
+  public void setPath(String path) {
+    this.path = path;
+  }
+
+  public String getTableFormat() {
+    return tableFormat;
+  }
+
+  public void setTableFormat(String tableFormat) {
+    this.tableFormat = tableFormat;
+  }
+}
diff --git 
a/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/util/PluginUtil.java
 
b/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/util/PluginUtil.java
index 9af366430..471d44ab5 100644
--- 
a/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/util/PluginUtil.java
+++ 
b/linkis-engineconn-plugins/spark/src/main/java/org/apache/linkis/engineplugin/spark/datacalc/util/PluginUtil.java
@@ -45,6 +45,7 @@ public class PluginUtil {
     classMap.put("jdbc", JdbcSource.class);
     classMap.put("file", FileSource.class);
     classMap.put("redis", RedisSource.class);
+    classMap.put("datalake", DataLakeSource.class);
     return classMap;
   }
 
@@ -61,6 +62,7 @@ public class PluginUtil {
     classMap.put("hive", HiveSink.class);
     classMap.put("file", FileSink.class);
     classMap.put("redis", RedisSink.class);
+    classMap.put("datalake", DataLakeSink.class);
     return classMap;
   }
 
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/config/SparkConfiguration.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/config/SparkConfiguration.scala
index 0a3966701..dc851c6fb 100644
--- 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/config/SparkConfiguration.scala
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/config/SparkConfiguration.scala
@@ -145,6 +145,8 @@ object SparkConfiguration extends Logging {
 
   val SPARK_ONCE_YARN_RESTFUL_URL = 
CommonVars[String]("linkis.spark.once.yarn.restful.url", "")
 
+  val LINKIS_SPARK_ETL_SUPPORT_HUDI = 
CommonVars[Boolean]("linkis.spark.etl.support.hudi", false)
+
   private def getMainJarName(): String = {
     val somePath = ClassUtils.jarOfClass(classOf[SparkEngineConnFactory])
     if (somePath.isDefined) {
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/DataLakeSink.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/DataLakeSink.scala
new file mode 100644
index 000000000..a2deb99a6
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/DataLakeSink.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc.sink
+
+import org.apache.linkis.common.utils.Logging
+import org.apache.linkis.engineplugin.spark.datacalc.api.DataCalcSink
+
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+
+class DataLakeSink extends DataCalcSink[DataLakeSinkConfig] with Logging {
+
+  def output(spark: SparkSession, ds: Dataset[Row]): Unit = {
+    logger.info(s"Save data to ${config.getTableFormat} tablePath: 
${config.getPath}")
+    val writer = 
ds.write.format(config.getTableFormat).mode(config.getSaveMode)
+
+    if (config.getOptions != null && !config.getOptions.isEmpty) {
+      writer.options(config.getOptions)
+    }
+    writer.save(config.getPath)
+  }
+
+}
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/DataLakeSource.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/DataLakeSource.scala
new file mode 100644
index 000000000..d1a07211c
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/DataLakeSource.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc.source
+
+import org.apache.linkis.common.utils.Logging
+import org.apache.linkis.engineplugin.spark.datacalc.api.DataCalcSource
+
+import org.apache.commons.text.StringSubstitutor
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+
+class DataLakeSource extends DataCalcSource[DataLakeSourceConfig] with Logging 
{
+
+  override def getData(spark: SparkSession): Dataset[Row] = {
+    val reader = spark.read
+
+    if (config.getOptions != null && !config.getOptions.isEmpty) {
+      reader.options(config.getOptions)
+    }
+    val substitutor = new StringSubstitutor(config.getVariables)
+    val path = substitutor.replace(config.getPath)
+    logger.info(s"Load data to ${config.getTableFormat} tablePath: 
${config.getPath}")
+
+    reader.format(config.getTableFormat).load(path)
+  }
+
+}
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/factory/SparkEngineConnFactory.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/factory/SparkEngineConnFactory.scala
index ec3b697ea..fa77e6ed8 100644
--- 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/factory/SparkEngineConnFactory.scala
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/factory/SparkEngineConnFactory.scala
@@ -208,6 +208,10 @@ class SparkEngineConnFactory extends 
MultiExecutorEngineConnFactory with Logging
     if (System.getenv("SPARK_HOME") != null) 
conf.setSparkHome(System.getenv("SPARK_HOME"))
     conf.set("spark.scheduler.mode", "FAIR")
 
+    if (SparkConfiguration.LINKIS_SPARK_ETL_SUPPORT_HUDI.getValue) {
+      conf.set("spark.serializer", 
"org.apache.spark.serializer.KryoSerializer")
+    }
+
     val builder = SparkSession.builder.config(conf)
     builder.enableHiveSupport().getOrCreate()
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to