[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core

GitBox Wed, 22 Jul 2020 03:23:14 -0700


AngersZhuuuu commented on a change in pull request #29085:
URL: https://github.com/apache/spark/pull/29085#discussion_r458691259




##########
File path: sql/core/src/test/resources/sql-tests/results/transform.sql.out
##########
@@ -0,0 +1,160 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 12
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
+('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), 
bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), 
date('2000-04-01')),
+('2', false, unhex('537061726B2053514C'), tinyint(2), 2,  smallint(200), 
bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'), 
date('2000-04-02')),
+('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300), 
bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'), 
date('2000-04-03'))
+as t1(a, b, c, d, e, f, g, h, i, j, k, l)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'cat' AS (a)
+FROM t1
+-- !query schema
+struct<a:string>
+-- !query output
+1
+2
+3
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'some_non_existent_command' AS (a)
+FROM t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+Subprocess exited with status 127. Error: /bin/bash: 
some_non_existent_command: command not found
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'python some_non_existent_file' AS (a)
+FROM t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+Subprocess exited with status 2. Error: python: can't open file 
'some_non_existent_file': [Errno 2] No such file or directory
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+    SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    USING 'cat' AS (
+        a string,
+        b boolean,
+        c binary,
+        d tinyint,
+        e int,
+        f smallint,
+        g long,
+        h float,
+        i double,
+        j decimal(38, 18),
+        k timestamp,
+        l date)
+    FROM t1
+) tmp
+-- !query schema
+struct<a:string,b:boolean,decode(c, 
UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:timestamp,l:date>
+-- !query output
+1      true    Spark SQL       1       1       100     1       1.0     1.0     
1.000000000000000000    1997-01-02 00:00:00     2000-04-01
+2      false   Spark SQL       2       2       200     2       2.0     2.0     
2.000000000000000000    1997-01-02 03:04:05     2000-04-02
+3      true    Spark SQL       3       3       300     3       3.0     3.0     
3.000000000000000000    1997-02-10 17:32:01     2000-04-03
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'cat'
+FROM t1
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException

Review comment:
       > Which code throws this exception?
   
   ```
   Caused by: java.lang.ArrayIndexOutOfBoundsException: 1
        at 
org.apache.spark.sql.catalyst.expressions.GenericInternalRow.genericGet(rows.scala:201)
        at 
org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getAs(rows.scala:35)
        at 
org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.isNullAt(rows.scala:36)
        at 
org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.isNullAt$(rows.scala:36)
        at 
org.apache.spark.sql.catalyst.expressions.GenericInternalRow.isNullAt(rows.scala:195)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(generated.java:40)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(generated.java:22)
        at scala.collection.Iterator$$anon$10.next(Iterator.scala:459)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:341)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
        at 
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
        at org.apache.spark.scheduler.Task.run(Task.scala:127)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1403)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)
        ... 3 more
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core

Reply via email to