This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 6fe52ad  [SPARK-31414][SQL] Fix performance regression with new 
TimestampFormatter for json and csv time parsing
6fe52ad is described below

commit 6fe52ad0a7eafe4293338a075ee25917127d4497
Author: Kent Yao <yaooq...@hotmail.com>
AuthorDate: Mon Apr 13 03:11:28 2020 +0000

    [SPARK-31414][SQL] Fix performance regression with new TimestampFormatter 
for json and csv time parsing
    
    With benchmark original, where the timestamp values are valid to the new 
parser
    
    the result is
    ```scala
    [info] Running benchmark: Read dates and timestamps
    [info]   Running case: timestamp strings
    [info]   Stopped after 3 iterations, 5781 ms
    [info]   Running case: parse timestamps from Dataset[String]
    [info]   Stopped after 3 iterations, 44764 ms
    [info]   Running case: infer timestamps from Dataset[String]
    [info]   Stopped after 3 iterations, 93764 ms
    [info]   Running case: from_json(timestamp)
    [info]   Stopped after 3 iterations, 59021 ms
    ```
    When we modify the benchmark to
    
    ```scala
         def timestampStr: Dataset[String] = {
            spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
              iter.map(i => s"""{"timestamp":"1970-01-01T01:02:03.${i % 
100}"}""")
            }.select($"value".as("timestamp")).as[String]
          }
    
          readBench.addCase("timestamp strings", numIters) { _ =>
            timestampStr.noop()
          }
    
          readBench.addCase("parse timestamps from Dataset[String]", numIters) 
{ _ =>
            spark.read.schema(tsSchema).json(timestampStr).noop()
          }
    
          readBench.addCase("infer timestamps from Dataset[String]", numIters) 
{ _ =>
            spark.read.json(timestampStr).noop()
          }
    ```
    where the timestamp values are invalid for the new parser which causes a 
fallback to legacy parser(2.4).
    the result is
    
    ```scala
    [info] Running benchmark: Read dates and timestamps
    [info]   Running case: timestamp strings
    [info]   Stopped after 3 iterations, 5623 ms
    [info]   Running case: parse timestamps from Dataset[String]
    [info]   Stopped after 3 iterations, 506637 ms
    [info]   Running case: infer timestamps from Dataset[String]
    [info]   Stopped after 3 iterations, 509076 ms
    ```
    About 10x perf-regression
    
    BUT if we modify the timestamp pattern to `....HH:mm:ss[.SSS][XXX]` which 
make all timestamp values valid for the new parser to prohibit fallback, the 
result is
    
    ```scala
    [info] Running benchmark: Read dates and timestamps
    [info]   Running case: timestamp strings
    [info]   Stopped after 3 iterations, 5623 ms
    [info]   Running case: parse timestamps from Dataset[String]
    [info]   Stopped after 3 iterations, 506637 ms
    [info]   Running case: infer timestamps from Dataset[String]
    [info]   Stopped after 3 iterations, 509076 ms
    ```
    
     Fix performance regression.
    
    NO
    
    new tests added.
    
    Closes #28181 from yaooqinn/SPARK-31414.
    
    Authored-by: Kent Yao <yaooq...@hotmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit d65f534c5ad4385b7c5198f15cb014e1d24e47c9)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/catalyst/csv/CSVOptions.scala |   9 +-
 .../spark/sql/catalyst/json/JSONOptions.scala      |   9 +-
 sql/core/benchmarks/CSVBenchmark-jdk11-results.txt |  88 +++++++-------
 sql/core/benchmarks/CSVBenchmark-results.txt       |  88 +++++++-------
 .../benchmarks/JsonBenchmark-jdk11-results.txt     | 130 ++++++++++-----------
 sql/core/benchmarks/JsonBenchmark-results.txt      | 130 ++++++++++-----------
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   |  12 ++
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  |  12 ++
 .../execution/datasources/csv/CSVBenchmark.scala   |   4 +-
 .../execution/datasources/json/JsonBenchmark.scala |   4 +-
 10 files changed, 262 insertions(+), 224 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 8892037..9d09cab 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -26,6 +26,7 @@ import com.univocity.parsers.csv.{CsvParserSettings, 
CsvWriterSettings, Unescape
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 
 class CSVOptions(
     @transient val parameters: CaseInsensitiveMap[String],
@@ -148,8 +149,12 @@ class CSVOptions(
 
   val dateFormat: String = parameters.getOrElse("dateFormat", 
DateFormatter.defaultPattern)
 
-  val timestampFormat: String =
-    parameters.getOrElse("timestampFormat", 
s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")
+  val timestampFormat: String = parameters.getOrElse("timestampFormat",
+    if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+      s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
+    } else {
+      s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
+    })
 
   val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 45c4edf..f9222f5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -27,6 +27,7 @@ import com.fasterxml.jackson.core.json.JsonReadFeature
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 
 /**
  * Options for parsing JSON data into Spark SQL rows.
@@ -90,8 +91,12 @@ private[sql] class JSONOptions(
 
   val dateFormat: String = parameters.getOrElse("dateFormat", 
DateFormatter.defaultPattern)
 
-  val timestampFormat: String =
-    parameters.getOrElse("timestampFormat", 
s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")
+  val timestampFormat: String = parameters.getOrElse("timestampFormat",
+    if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+      s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
+    } else {
+      s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
+    })
 
   val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt 
b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
index d8071e7..147a77f 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
@@ -2,66 +2,66 @@
 Benchmark to measure CSV read/write performance
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 44297          44515         
373          0.0      885948.7       1.0X
+One quoted string                                 24907          29374         
NaN          0.0      498130.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                              196720         197783        
1560          0.0      196719.8       1.0X
-Select 100 columns                                46691          46861         
219          0.0       46691.4       4.2X
-Select one column                                 36811          36922         
111          0.0       36811.3       5.3X
-count()                                            8520           8610         
106          0.1        8520.5      23.1X
-Select 100 columns, one bad input field           67914          67994         
136          0.0       67914.0       2.9X
-Select 100 columns, corrupt record field          77272          77445         
214          0.0       77272.0       2.5X
+Select 1000 columns                               62811          63690        
1416          0.0       62811.4       1.0X
+Select 100 columns                                23839          24064         
230          0.0       23839.5       2.6X
+Select one column                                 19936          20641         
827          0.1       19936.4       3.2X
+count()                                            4174           4380         
206          0.2        4174.4      15.0X
+Select 100 columns, one bad input field           41015          42380        
1688          0.0       41015.4       1.5X
+Select 100 columns, corrupt record field          46281          46338         
 93          0.0       46280.5       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       25965          26054         
103          0.4        2596.5       1.0X
-Select 1 column + count()                         18591          18666         
 91          0.5        1859.1       1.4X
-count()                                            6102           6119         
 18          1.6         610.2       4.3X
+Select 10 columns + count()                       10810          10997         
163          0.9        1081.0       1.0X
+Select 1 column + count()                          7608           7641         
 47          1.3         760.8       1.4X
+count()                                            2415           2462         
 77          4.1         241.5       4.5X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     2142           2161         
 17          4.7         214.2       1.0X
-to_csv(timestamp)                                 14744          14950         
182          0.7        1474.4       0.1X
-write timestamps to files                         12078          12202         
175          0.8        1207.8       0.2X
-Create a dataset of dates                          2275           2291         
 18          4.4         227.5       0.9X
-to_csv(date)                                      11407          11464         
 51          0.9        1140.7       0.2X
-write dates to files                               7638           7702         
 90          1.3         763.8       0.3X
+Create a dataset of timestamps                      874            914         
 37         11.4          87.4       1.0X
+to_csv(timestamp)                                  7051           7223         
250          1.4         705.1       0.1X
+write timestamps to files                          6712           6741         
 31          1.5         671.2       0.1X
+Create a dataset of dates                           909            945         
 35         11.0          90.9       1.0X
+to_csv(date)                                       4222           4231         
  8          2.4         422.2       0.2X
+write dates to files                               3799           3813         
 14          2.6         379.9       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2578           2590         
 10          3.9         257.8       1.0X
-read timestamps from files                        60103          60694         
512          0.2        6010.3       0.0X
-infer timestamps from files                      107871         108268         
351          0.1       10787.1       0.0X
-read date text from files                          2306           2310         
  4          4.3         230.6       1.1X
-read date from files                              47415          47657         
367          0.2        4741.5       0.1X
-infer date from files                             35261          35447         
164          0.3        3526.1       0.1X
-timestamp strings                                  3045           3056         
 11          3.3         304.5       0.8X
-parse timestamps from Dataset[String]             62221          63173         
849          0.2        6222.1       0.0X
-infer timestamps from Dataset[String]            118838         119629         
697          0.1       11883.8       0.0X
-date strings                                       3459           3481         
 19          2.9         345.9       0.7X
-parse dates from Dataset[String]                  51026          51447         
503          0.2        5102.6       0.1X
-from_csv(timestamp)                               60738          61818         
936          0.2        6073.8       0.0X
-from_csv(date)                                    46012          46278         
370          0.2        4601.2       0.1X
+read timestamp text from files                     1342           1364         
 35          7.5         134.2       1.0X
+read timestamps from files                        20300          20473         
247          0.5        2030.0       0.1X
+infer timestamps from files                       40705          40744         
 54          0.2        4070.5       0.0X
+read date text from files                          1146           1151         
  6          8.7         114.6       1.2X
+read date from files                              12278          12408         
117          0.8        1227.8       0.1X
+infer date from files                             12734          12872         
220          0.8        1273.4       0.1X
+timestamp strings                                  1467           1482         
 15          6.8         146.7       0.9X
+parse timestamps from Dataset[String]             21708          22234         
477          0.5        2170.8       0.1X
+infer timestamps from Dataset[String]             42357          43253         
922          0.2        4235.7       0.0X
+date strings                                       1512           1532         
 18          6.6         151.2       0.9X
+parse dates from Dataset[String]                  13436          13470         
 33          0.7        1343.6       0.1X
+from_csv(timestamp)                               20390          20486         
 95          0.5        2039.0       0.1X
+from_csv(date)                                    12592          12693         
139          0.8        1259.2       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.5+10 on Mac OS X 10.15.2
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       11889          11945         
 52          0.0      118893.1       1.0X
-pushdown disabled                                 11790          11860         
115          0.0      117902.3       1.0X
-w/ filters                                         1240           1278         
 33          0.1       12400.8       9.6X
+w/o filters                                       12535          12606         
 67          0.0      125348.8       1.0X
+pushdown disabled                                 12611          12672         
 91          0.0      126112.9       1.0X
+w/ filters                                         1093           1099         
 11          0.1       10928.3      11.5X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt 
b/sql/core/benchmarks/CSVBenchmark-results.txt
index b3ba69c..498ca4c 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,66 +2,66 @@
 Benchmark to measure CSV read/write performance
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 51602          51659         
 59          0.0     1032039.4       1.0X
+One quoted string                                 24073          24109         
 33          0.0      481463.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                              191926         192879        
1615          0.0      191925.6       1.0X
-Select 100 columns                                46766          46846         
 69          0.0       46766.1       4.1X
-Select one column                                 35877          35930         
 83          0.0       35876.8       5.3X
-count()                                           11186          11262         
 65          0.1       11186.0      17.2X
-Select 100 columns, one bad input field           59943          60107         
232          0.0       59943.0       3.2X
-Select 100 columns, corrupt record field          73062          73406         
479          0.0       73062.2       2.6X
+Select 1000 columns                               58415          59611        
2071          0.0       58414.8       1.0X
+Select 100 columns                                22568          23020         
594          0.0       22568.0       2.6X
+Select one column                                 18995          19058         
 99          0.1       18995.0       3.1X
+count()                                            5301           5332         
 30          0.2        5300.9      11.0X
+Select 100 columns, one bad input field           39736          40153         
361          0.0       39736.1       1.5X
+Select 100 columns, corrupt record field          47195          47826         
590          0.0       47195.2       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       22389          22447         
 87          0.4        2238.9       1.0X
-Select 1 column + count()                         14844          14890         
 43          0.7        1484.4       1.5X
-count()                                            5519           5538         
 18          1.8         551.9       4.1X
+Select 10 columns + count()                        9884           9904         
 25          1.0         988.4       1.0X
+Select 1 column + count()                          6794           6835         
 46          1.5         679.4       1.5X
+count()                                            2060           2065         
  5          4.9         206.0       4.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1949           1977         
 25          5.1         194.9       1.0X
-to_csv(timestamp)                                 14944          15702         
714          0.7        1494.4       0.1X
-write timestamps to files                         12983          12998         
 14          0.8        1298.3       0.2X
-Create a dataset of dates                          2156           2164         
  7          4.6         215.6       0.9X
-to_csv(date)                                       9675           9709         
 41          1.0         967.5       0.2X
-write dates to files                               7880           7897         
 15          1.3         788.0       0.2X
+Create a dataset of timestamps                      717            732         
 18         14.0          71.7       1.0X
+to_csv(timestamp)                                  6994           7100         
121          1.4         699.4       0.1X
+write timestamps to files                          6417           6435         
 27          1.6         641.7       0.1X
+Create a dataset of dates                           827            855         
 24         12.1          82.7       0.9X
+to_csv(date)                                       4408           4438         
 32          2.3         440.8       0.2X
+write dates to files                               3738           3758         
 28          2.7         373.8       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2235           2245         
 10          4.5         223.5       1.0X
-read timestamps from files                        54490          54690         
283          0.2        5449.0       0.0X
-infer timestamps from files                      104501         104737         
236          0.1       10450.1       0.0X
-read date text from files                          2035           2040         
  6          4.9         203.5       1.1X
-read date from files                              39650          39707         
 52          0.3        3965.0       0.1X
-infer date from files                             29235          29363         
164          0.3        2923.5       0.1X
-timestamp strings                                  3412           3426         
 18          2.9         341.2       0.7X
-parse timestamps from Dataset[String]             66864          67804         
981          0.1        6686.4       0.0X
-infer timestamps from Dataset[String]            118780         119284         
837          0.1       11878.0       0.0X
-date strings                                       3730           3734         
  4          2.7         373.0       0.6X
-parse dates from Dataset[String]                  48728          49071         
309          0.2        4872.8       0.0X
-from_csv(timestamp)                               62294          62493         
260          0.2        6229.4       0.0X
-from_csv(date)                                    44581          44665         
117          0.2        4458.1       0.1X
+read timestamp text from files                     1121           1176         
 52          8.9         112.1       1.0X
+read timestamps from files                        21298          21366         
105          0.5        2129.8       0.1X
+infer timestamps from files                       41008          41051         
 39          0.2        4100.8       0.0X
+read date text from files                           962            967         
  5         10.4          96.2       1.2X
+read date from files                              11749          11772         
 22          0.9        1174.9       0.1X
+infer date from files                             12426          12459         
 29          0.8        1242.6       0.1X
+timestamp strings                                  1508           1519         
  9          6.6         150.8       0.7X
+parse timestamps from Dataset[String]             21674          21997         
455          0.5        2167.4       0.1X
+infer timestamps from Dataset[String]             42141          42230         
105          0.2        4214.1       0.0X
+date strings                                       1694           1701         
  8          5.9         169.4       0.7X
+parse dates from Dataset[String]                  12929          12951         
 25          0.8        1292.9       0.1X
+from_csv(timestamp)                               20603          20786         
166          0.5        2060.3       0.1X
+from_csv(date)                                    12325          12338         
 12          0.8        1232.5       0.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.2
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       12557          12634         
 78          0.0      125572.9       1.0X
-pushdown disabled                                 12449          12509         
 65          0.0      124486.4       1.0X
-w/ filters                                         1372           1393         
 18          0.1       13724.8       9.1X
+w/o filters                                       12455          12474         
 22          0.0      124553.8       1.0X
+pushdown disabled                                 12462          12486         
 29          0.0      124624.9       1.0X
+w/ filters                                         1073           1092         
 18          0.1       10727.6      11.6X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt 
b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
index 920e0a7..03bc334 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
@@ -3,110 +3,110 @@ Benchmark for performance of JSON parsing
 
================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       84774          84927         
264          1.2         847.7       1.0X
-UTF-8 is set                                     119081         120155        
1773          0.8        1190.8       0.7X
+No encoding                                       46010          46118         
113          2.2         460.1       1.0X
+UTF-8 is set                                      54407          55427        
1718          1.8         544.1       0.8X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       49293          49356         
 70          2.0         492.9       1.0X
-UTF-8 is set                                      80183          80211         
 25          1.2         801.8       0.6X
+No encoding                                       26614          28220        
1461          3.8         266.1       1.0X
+UTF-8 is set                                      42765          43400         
550          2.3         427.6       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       61070          61476         
536          0.2        6107.0       1.0X
-UTF-8 is set                                     109765         109881         
102          0.1       10976.5       0.6X
+No encoding                                       35696          35821         
113          0.3        3569.6       1.0X
+UTF-8 is set                                      55441          56176        
1037          0.2        5544.1       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                      176999         178163        
1008          0.0      353997.9       1.0X
-UTF-8 is set                                     201209         201641         
614          0.0      402419.0       0.9X
+No encoding                                       61514          62968         
NaN          0.0      123027.2       1.0X
+UTF-8 is set                                      72096          72933        
1162          0.0      144192.7       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 18768          20587         
496          0.5        1876.8       1.0X
-Select 1 column                                   22642          22644         
  3          0.4        2264.2       0.8X
+Select 10 columns                                  9859           9913         
 79          1.0         985.9       1.0X
+Select 1 column                                   10981          11003         
 36          0.9        1098.1       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      7697           7738         
 55          1.3         769.7       1.0X
-Short column with UTF-8                           14051          14189         
176          0.7        1405.1       0.5X
-Wide column without encoding                     108999         110075        
1085          0.1       10899.9       0.1X
-Wide column with UTF-8                           157433         157779         
308          0.1       15743.3       0.0X
+Short column without encoding                      3555           3579         
 27          2.8         355.5       1.0X
+Short column with UTF-8                            5204           5227         
 35          1.9         520.4       0.7X
+Wide column without encoding                      60458          60637         
164          0.2        6045.8       0.1X
+Wide column with UTF-8                            77544          78111         
551          0.1        7754.4       0.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           644            647         
  4         15.5          64.4       1.0X
-from_json                                         25859          25872         
 12          0.4        2585.9       0.0X
-json_tuple                                        31679          31761         
 71          0.3        3167.9       0.0X
-get_json_object                                   24772          25220         
389          0.4        2477.2       0.0X
+Text read                                           342            346         
  3         29.2          34.2       1.0X
+from_json                                          7123           7318         
179          1.4         712.3       0.0X
+json_tuple                                         9843           9957         
132          1.0         984.3       0.0X
+get_json_object                                    7827           8046         
194          1.3         782.7       0.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          3135           3165         
 52         15.9          62.7       1.0X
-schema inferring                                  29383          29389         
 10          1.7         587.7       0.1X
-parsing                                           32623          35183         
NaN          1.5         652.5       0.1X
+Text read                                          1856           1884         
 32         26.9          37.1       1.0X
+schema inferring                                  16734          16900         
153          3.0         334.7       0.1X
+parsing                                           14884          15203         
470          3.4         297.7       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                         11874          11948         
 82          4.2         237.5       1.0X
-Schema inferring                                  42382          42398         
 23          1.2         847.6       0.3X
-Parsing without charset                           36410          36442         
 54          1.4         728.2       0.3X
-Parsing with UTF-8                                62412          62463         
 48          0.8        1248.2       0.2X
+Text read                                          5932           6148         
228          8.4         118.6       1.0X
+Schema inferring                                  20836          21938        
1086          2.4         416.7       0.3X
+Parsing without charset                           18134          18661         
457          2.8         362.7       0.3X
+Parsing with UTF-8                                27734          28069         
378          1.8         554.7       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     2191           2209         
 20          4.6         219.1       1.0X
-to_json(timestamp)                                18670          19042         
565          0.5        1867.0       0.1X
-write timestamps to files                         11836          13156         
NaN          0.8        1183.6       0.2X
-Create a dataset of dates                          2321           2351         
 33          4.3         232.1       0.9X
-to_json(date)                                     12703          12726         
 24          0.8        1270.3       0.2X
-write dates to files                               8230           8303         
 76          1.2         823.0       0.3X
+Create a dataset of timestamps                      889            914         
 28         11.2          88.9       1.0X
+to_json(timestamp)                                 7920           8172         
353          1.3         792.0       0.1X
+write timestamps to files                          6726           6822         
129          1.5         672.6       0.1X
+Create a dataset of dates                           953            963         
 12         10.5          95.3       0.9X
+to_json(date)                                      5370           5705         
320          1.9         537.0       0.2X
+write dates to files                               4109           4166         
 52          2.4         410.9       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2780           2795         
 13          3.6         278.0       1.0X
-read timestamps from files                        37158          37305         
137          0.3        3715.8       0.1X
-infer timestamps from files                       73666          73838         
149          0.1        7366.6       0.0X
-read date text from files                          2597           2609         
 10          3.9         259.7       1.1X
-read date from files                              24439          24501         
 56          0.4        2443.9       0.1X
-timestamp strings                                  3052           3064         
 12          3.3         305.2       0.9X
-parse timestamps from Dataset[String]             43611          43665         
 52          0.2        4361.1       0.1X
-infer timestamps from Dataset[String]             83745          84153         
376          0.1        8374.5       0.0X
-date strings                                       4068           4076         
 10          2.5         406.8       0.7X
-parse dates from Dataset[String]                  34700          34807         
118          0.3        3470.0       0.1X
-from_json(timestamp)                              64074          64124         
 53          0.2        6407.4       0.0X
-from_json(date)                                   52520          52617         
101          0.2        5252.0       0.1X
+read timestamp text from files                     1614           1675         
 55          6.2         161.4       1.0X
+read timestamps from files                        16640          16858         
209          0.6        1664.0       0.1X
+infer timestamps from files                       33239          33388         
227          0.3        3323.9       0.0X
+read date text from files                          1310           1340         
 44          7.6         131.0       1.2X
+read date from files                               9470           9513         
 41          1.1         947.0       0.2X
+timestamp strings                                  1303           1342         
 47          7.7         130.3       1.2X
+parse timestamps from Dataset[String]             17650          18073         
380          0.6        1765.0       0.1X
+infer timestamps from Dataset[String]             32623          34065        
1330          0.3        3262.3       0.0X
+date strings                                       1864           1871         
  7          5.4         186.4       0.9X
+parse dates from Dataset[String]                  10914          11316         
482          0.9        1091.4       0.1X
+from_json(timestamp)                              21102          21990         
929          0.5        2110.2       0.1X
+from_json(date)                                   15275          15961         
598          0.7        1527.5       0.1X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt 
b/sql/core/benchmarks/JsonBenchmark-results.txt
index e435f57..0f188c4 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -3,110 +3,110 @@ Benchmark for performance of JSON parsing
 
================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       61888          61918         
 27          1.6         618.9       1.0X
-UTF-8 is set                                     109057         113663         
NaN          0.9        1090.6       0.6X
+No encoding                                       38998          41002         
NaN          2.6         390.0       1.0X
+UTF-8 is set                                      61231          63282        
1854          1.6         612.3       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       44517          44535         
 29          2.2         445.2       1.0X
-UTF-8 is set                                      75722          75840         
111          1.3         757.2       0.6X
+No encoding                                       28272          28338         
 70          3.5         282.7       1.0X
+UTF-8 is set                                      58681          62243        
1517          1.7         586.8       0.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       63677          64090         
633          0.2        6367.7       1.0X
-UTF-8 is set                                      99424          99615         
185          0.1        9942.4       0.6X
+No encoding                                       44026          51829        
1329          0.2        4402.6       1.0X
+UTF-8 is set                                      65839          68596         
500          0.2        6583.9       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                      174052         174251         
174          0.0      348104.1       1.0X
-UTF-8 is set                                     189000         189098         
113          0.0      378000.9       0.9X
+No encoding                                       72144          74820         
NaN          0.0      144287.6       1.0X
+UTF-8 is set                                      69571          77888         
NaN          0.0      139142.3       1.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 18387          18473         
142          0.5        1838.7       1.0X
-Select 1 column                                   25560          25571         
 13          0.4        2556.0       0.7X
+Select 10 columns                                  9502           9604         
106          1.1         950.2       1.0X
+Select 1 column                                   11861          11948         
109          0.8        1186.1       0.8X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      9323           9384         
 58          1.1         932.3       1.0X
-Short column with UTF-8                           14016          14058         
 55          0.7        1401.6       0.7X
-Wide column without encoding                     133258         133532         
382          0.1       13325.8       0.1X
-Wide column with UTF-8                           181212         181283         
 61          0.1       18121.2       0.1X
+Short column without encoding                      3830           3846         
 15          2.6         383.0       1.0X
+Short column with UTF-8                            5538           5543         
  7          1.8         553.8       0.7X
+Wide column without encoding                      66899          69158         
NaN          0.1        6689.9       0.1X
+Wide column with UTF-8                            90052          93235         
NaN          0.1        9005.2       0.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          1168           1174         
  5          8.6         116.8       1.0X
-from_json                                         22604          23571         
883          0.4        2260.4       0.1X
-json_tuple                                        29979          30053         
 91          0.3        2997.9       0.0X
-get_json_object                                   21987          22263         
241          0.5        2198.7       0.1X
+Text read                                           659            674         
 13         15.2          65.9       1.0X
+from_json                                          7676           7943         
405          1.3         767.6       0.1X
+json_tuple                                         9881          10172         
273          1.0         988.1       0.1X
+get_json_object                                    7949           8055         
119          1.3         794.9       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          5831           5842         
 14          8.6         116.6       1.0X
-schema inferring                                  31372          31456         
 73          1.6         627.4       0.2X
-parsing                                           35911          36191         
254          1.4         718.2       0.2X
+Text read                                          3314           3326         
 17         15.1          66.3       1.0X
+schema inferring                                  16549          17037         
484          3.0         331.0       0.2X
+parsing                                           15138          15283         
172          3.3         302.8       0.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                         10249          10314         
 77          4.9         205.0       1.0X
-Schema inferring                                  35403          35436         
 40          1.4         708.1       0.3X
-Parsing without charset                           32875          32879         
  4          1.5         657.5       0.3X
-Parsing with UTF-8                                53444          53519         
100          0.9        1068.9       0.2X
+Text read                                          5136           5446         
268          9.7         102.7       1.0X
+Schema inferring                                  19864          20568        
1191          2.5         397.3       0.3X
+Parsing without charset                           17535          17888         
329          2.9         350.7       0.3X
+Parsing with UTF-8                                25609          25758         
218          2.0         512.2       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1909           1924         
 17          5.2         190.9       1.0X
-to_json(timestamp)                                18956          19122         
208          0.5        1895.6       0.1X
-write timestamps to files                         13446          13472         
 43          0.7        1344.6       0.1X
-Create a dataset of dates                          2180           2200         
 28          4.6         218.0       0.9X
-to_json(date)                                     12780          12899         
109          0.8        1278.0       0.1X
-write dates to files                               7835           7865         
 29          1.3         783.5       0.2X
+Create a dataset of timestamps                      784            790         
  7         12.8          78.4       1.0X
+to_json(timestamp)                                 8005           8055         
 50          1.2         800.5       0.1X
+write timestamps to files                          6515           6559         
 45          1.5         651.5       0.1X
+Create a dataset of dates                           854            881         
 24         11.7          85.4       0.9X
+to_json(date)                                      5187           5194         
  7          1.9         518.7       0.2X
+write dates to files                               3663           3684         
 22          2.7         366.3       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 
4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.4
+Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2467           2477         
  9          4.1         246.7       1.0X
-read timestamps from files                        40186          40342         
135          0.2        4018.6       0.1X
-infer timestamps from files                       82005          82079         
 71          0.1        8200.5       0.0X
-read date text from files                          2243           2264         
 22          4.5         224.3       1.1X
-read date from files                              24852          24863         
 19          0.4        2485.2       0.1X
-timestamp strings                                  3836           3854         
 16          2.6         383.6       0.6X
-parse timestamps from Dataset[String]             51521          51697         
242          0.2        5152.1       0.0X
-infer timestamps from Dataset[String]             97300          97398         
133          0.1        9730.0       0.0X
-date strings                                       4488           4491         
  5          2.2         448.8       0.5X
-parse dates from Dataset[String]                  37918          37976         
 68          0.3        3791.8       0.1X
-from_json(timestamp)                              69611          69632         
 36          0.1        6961.1       0.0X
-from_json(date)                                   56598          56974         
347          0.2        5659.8       0.0X
+read timestamp text from files                     1297           1316         
 26          7.7         129.7       1.0X
+read timestamps from files                        16915          17723         
963          0.6        1691.5       0.1X
+infer timestamps from files                       33967          34304         
360          0.3        3396.7       0.0X
+read date text from files                          1095           1100         
  7          9.1         109.5       1.2X
+read date from files                               8376           8513         
209          1.2         837.6       0.2X
+timestamp strings                                  1807           1816         
  8          5.5         180.7       0.7X
+parse timestamps from Dataset[String]             18189          18242         
 74          0.5        1818.9       0.1X
+infer timestamps from Dataset[String]             37906          38547         
571          0.3        3790.6       0.0X
+date strings                                       2191           2194         
  4          4.6         219.1       0.6X
+parse dates from Dataset[String]                  11593          11625         
 33          0.9        1159.3       0.1X
+from_json(timestamp)                              22589          22650         
101          0.4        2258.9       0.1X
+from_json(date)                                   16479          16619         
159          0.6        1647.9       0.1X
 
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 89fb4d5..b9e0d50 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -212,4 +212,16 @@ class CsvFunctionsSuite extends QueryTest with 
SharedSparkSession {
       assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0)
     }
   }
+
+  test("optional datetime parser does not affect csv time formatting") {
+    val s = "2015-08-26 12:34:46"
+    def toDF(p: String): DataFrame = sql(
+      s"""
+         |SELECT
+         | to_csv(
+         |   named_struct('time', timestamp'$s'), map('timestampFormat', "$p")
+         | )
+         | """.stripMargin)
+    checkAnswer(toDF("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), 
toDF("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"))
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 8cc5c22..b989b5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -688,4 +688,16 @@ class JsonFunctionsSuite extends QueryTest with 
SharedSparkSession {
           options.asJava)),
       Seq(Row("string")))
   }
+
+  test("optional datetime parser does not affect json time formatting") {
+    val s = "2015-08-26 12:34:46"
+    def toDF(p: String): DataFrame = sql(
+      s"""
+         |SELECT
+         | to_json(
+         |   named_struct('time', timestamp'$s'), map('timestampFormat', "$p")
+         | )
+         | """.stripMargin)
+    checkAnswer(toDF("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), 
toDF("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"))
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index e2abb39..53d287b 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -238,7 +238,9 @@ object CSVBenchmark extends SqlBasedBenchmark {
 
       def timestampStr: Dataset[String] = {
         spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
-          iter.map(i => s"1970-01-01T01:02:03.${100 + i % 100}Z")
+          iter.map {
+            i => s"1970-01-01T01:02:03.${i % 200}Z".stripSuffix(".0Z")
+          }
         }.select($"value".as("timestamp")).as[String]
       }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index bcecacc..5693088 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -445,7 +445,9 @@ object JsonBenchmark extends SqlBasedBenchmark {
 
       def timestampStr: Dataset[String] = {
         spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
-          iter.map(i => s"""{"timestamp":"1970-01-01T01:02:03.${100 + i % 
100}Z"}""")
+          iter.map { i =>
+            s"""{"timestamp":"1970-01-01T01:02:03.${i % 
200}Z"}""".stripSuffix(".0Z")
+          }
         }.select($"value".as("timestamp")).as[String]
       }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to