This is an automated email from the ASF dual-hosted git repository.

peacewong pushed a commit to branch dev-1.4.0
in repository https://gitbox.apache.org/repos/asf/linkis.git


The following commit(s) were added to refs/heads/dev-1.4.0 by this push:
     new 4767c5e4e spark etl support excel and  transformations can be null 
(#4539)
4767c5e4e is described below

commit 4767c5e4e8bf067182b444abb932ebef85fabe60
Author: ChengJie1053 <[email protected]>
AuthorDate: Thu May 18 14:13:31 2023 +0800

    spark etl support excel and  transformations can be null (#4539)
---
 docs/configuration/spark.md                        |   5 +
 .../spark/datacalc/DataCalcExecution.scala         |  10 +-
 .../spark/datacalc/sink/FileSink.scala             |   1 +
 .../spark/datacalc/source/FileSource.scala         |   1 +
 .../spark/src/test/resources/etltest.dolphin       |   4 +
 .../spark/datacalc/TestExcelCala.scala             | 135 +++++++++++++++++++++
 6 files changed, 153 insertions(+), 3 deletions(-)

diff --git a/docs/configuration/spark.md b/docs/configuration/spark.md
index 6c8abfcb6..99c07a12b 100644
--- a/docs/configuration/spark.md
+++ b/docs/configuration/spark.md
@@ -25,3 +25,8 @@
 |spark|wds.linkis.spark.engine.is.viewfs.env| true | 
spark.engine.is.viewfs.env|
 |spark|wds.linkis.spark.engineconn.fatal.log|error writing 
class;OutOfMemoryError|spark.engineconn.fatal.log|
 |spark|wds.linkis.spark.engine.scala.replace_package_header.enable| true 
|spark.engine.scala.replace_package_header.enable|
+
+
+The spark-excel package may cause class conflicts,need to download 
separately,put it in spark lib
+wget 
https://repo1.maven.org/maven2/com/crealytics/spark-excel-2.12.17-3.2.2_2.12/3.2.2_0.18.1/spark-excel-2.12.17-3.2.2_2.12-3.2.2_0.18.1.jar
+cp spark-excel-2.12.17-3.2.2_2.12-3.2.2_0.18.1.jar 
{LINKIS_HOME}/lib/linkis-engineconn-plugins/spark/dist/3.2.1/lib
\ No newline at end of file
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/DataCalcExecution.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/DataCalcExecution.scala
index 447005f5c..e1b17342e 100644
--- 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/DataCalcExecution.scala
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/DataCalcExecution.scala
@@ -52,9 +52,13 @@ object DataCalcExecution {
     val sources = mapleData.getSources.map(source =>
       PluginUtil.createSource[SR](source.getName, source.getConfig)
     )
-    val transformations = mapleData.getTransformations.map(sink =>
-      PluginUtil.createTransform[TR](sink.getName, sink.getConfig)
-    )
+    val transformations = if (mapleData.getTransformations == null) {
+      Array.empty[DataCalcTransform[TR]]
+    } else {
+      mapleData.getTransformations.map(sink =>
+        PluginUtil.createTransform[TR](sink.getName, sink.getConfig)
+      )
+    }
     val sinks =
       mapleData.getSinks.map(sink => PluginUtil.createSink[SK](sink.getName, 
sink.getConfig))
 
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/FileSink.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/FileSink.scala
index 1464375e3..3519a9f7f 100644
--- 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/FileSink.scala
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/sink/FileSink.scala
@@ -48,6 +48,7 @@ class FileSink extends DataCalcSink[FileSinkConfig] with 
Logging {
       case "parquet" => writer.parquet(path)
       case "text" => writer.text(path)
       case "orc" => writer.orc(path)
+      case "excel" => writer.format("excel").save(path)
       case _ => writer.format(config.getSerializer).save(path)
     }
   }
diff --git 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/FileSource.scala
 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/FileSource.scala
index 8b579a0b6..ae53d6b02 100644
--- 
a/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/FileSource.scala
+++ 
b/linkis-engineconn-plugins/spark/src/main/scala/org/apache/linkis/engineplugin/spark/datacalc/source/FileSource.scala
@@ -41,6 +41,7 @@ class FileSource extends DataCalcSource[FileSourceConfig] 
with Logging {
       case "parquet" => reader.parquet(path)
       case "text" => reader.text(path)
       case "orc" => reader.orc(path)
+      case "excel" => reader.format("excel").load(path)
       case _ => reader.format(config.getSerializer).load(path)
     }
     if (config.getColumnNames != null && config.getColumnNames.length > 0) {
diff --git a/linkis-engineconn-plugins/spark/src/test/resources/etltest.dolphin 
b/linkis-engineconn-plugins/spark/src/test/resources/etltest.dolphin
new file mode 100644
index 000000000..7b065bc06
--- /dev/null
+++ b/linkis-engineconn-plugins/spark/src/test/resources/etltest.dolphin
@@ -0,0 +1,4 @@
+name;age
+Michael;29
+Andy;30
+Justin;19
\ No newline at end of file
diff --git 
a/linkis-engineconn-plugins/spark/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestExcelCala.scala
 
b/linkis-engineconn-plugins/spark/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestExcelCala.scala
new file mode 100644
index 000000000..7e2b26648
--- /dev/null
+++ 
b/linkis-engineconn-plugins/spark/src/test/scala/org/apache/linkis/engineplugin/spark/datacalc/TestExcelCala.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.linkis.engineplugin.spark.datacalc
+
+import org.apache.linkis.common.io.FsPath
+import org.apache.linkis.engineplugin.spark.datacalc.model.DataCalcGroupData
+
+import org.junit.jupiter.api.{Assertions, Test};
+
+class TestExcelCala {
+
+  val filePath = this.getClass.getResource("/").getFile
+
+  @Test
+  def testExcelWrite: Unit = {
+    // skip os: windows
+    if (!FsPath.WINDOWS) {
+      val data = 
DataCalcGroupData.getData(excelWriteConfigJson.replace("{filePath}", filePath))
+      Assertions.assertTrue(data != null)
+
+      val (sources, transforms, sinks) = DataCalcExecution.getPlugins(data)
+      Assertions.assertTrue(sources != null)
+      Assertions.assertTrue(transforms != null)
+      Assertions.assertTrue(sinks != null)
+    }
+  }
+
+  @Test
+  def testExcelReader: Unit = {
+    // skip os: windows
+    if (!FsPath.WINDOWS) {
+      val data = 
DataCalcGroupData.getData(excelReaderConfigJson.replace("{filePath}", filePath))
+      Assertions.assertTrue(data != null)
+
+      val (sources, transforms, sinks) = DataCalcExecution.getPlugins(data)
+      Assertions.assertTrue(sources != null)
+      Assertions.assertTrue(transforms != null)
+      Assertions.assertTrue(sinks != null)
+    }
+  }
+
+  val excelWriteConfigJson =
+    """
+      |{
+      |    "sources": [
+      |        {
+      |            "name": "file",
+      |            "type": "source",
+      |            "config": {
+      |                "resultTable": "T1654611700631",
+      |                "path": "file://{filePath}/etltest.dolphin",
+      |                "serializer": "csv",
+      |                "options": {
+      |                "header":"true",
+      |                "delimiter":";"
+      |                },
+      |                "columnNames": ["name", "age"]
+      |            }
+      |        }
+      |    ],
+      |    "transformations": [
+      |        {
+      |            "name": "sql",
+      |            "type": "transformation",
+      |            "config": {
+      |                "resultTable": "T123",
+      |                "sql": "select * from T1654611700631"
+      |            }
+      |        }
+      |    ],
+      |    "sinks": [
+      |        {
+      |            "name": "file",
+      |            "config": {
+      |                "sourceTable": "T1654611700631",
+      |                "path": "file://{filePath}/excel",
+      |                "saveMode": "overwrite",
+      |                "serializer": "excel"
+      |            }
+      |        }
+      |    ]
+      |}
+      |""".stripMargin
+
+  val excelReaderConfigJson =
+    """
+      |{
+      |    "sources": [
+      |        {
+      |            "name": "file",
+      |            "type": "source",
+      |            "config": {
+      |                "resultTable": "T1654611700631",
+      |                "path": "file://{filePath}/excel",
+      |                "serializer": "excel",
+      |                "options": {
+      |                "header":"true"
+      |                },
+      |                "columnNames": ["name", "age"]
+      |            }
+      |        }
+      |    ],
+      |    "sinks": [
+      |        {
+      |            "name": "file",
+      |            "config": {
+      |                "sourceTable": "T1654611700631",
+      |                "path": "file://{filePath}/csv",
+      |                "saveMode": "overwrite",
+      |                "options": {
+      |                "header":"true"
+      |                },
+      |                "serializer": "csv"
+      |            }
+      |        }
+      |    ]
+      |}
+      |""".stripMargin
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to