This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 31e1ea1  [SPARK-32130][SQL][FOLLOWUP] Enable timestamps inference in 
JsonBenchmark
31e1ea1 is described below

commit 31e1ea165f3cbb503b05452b448010e81474dcad
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Thu Jul 2 13:26:57 2020 -0700

    [SPARK-32130][SQL][FOLLOWUP] Enable timestamps inference in JsonBenchmark
    
    ### What changes were proposed in this pull request?
    Set the JSON option `inferTimestamp` to `true` for the cases that measure 
perf of timestamp inference.
    
    ### Why are the changes needed?
    The PR https://github.com/apache/spark/pull/28966 disabled timestamp 
inference by default. As a consequence, some benchmarks don't measure perf of 
timestamp inference from JSON fields. This PR explicitly enable such inference.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    By re-generating results of `JsonBenchmark`.
    
    Closes #28981 from MaxGekk/json-inferTimestamps-disable-by-default-followup.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
    (cherry picked from commit 42f01e314b4874236544cc8b94bef766269385ee)
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../benchmarks/JsonBenchmark-jdk11-results.txt     | 86 +++++++++++-----------
 sql/core/benchmarks/JsonBenchmark-results.txt      | 86 +++++++++++-----------
 .../execution/datasources/json/JsonBenchmark.scala |  4 +-
 3 files changed, 88 insertions(+), 88 deletions(-)

diff --git a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt 
b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
index ff37084..2d506f0 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
@@ -7,106 +7,106 @@ OpenJDK 64-Bit Server VM 
11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-106
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       69219          69342         
116          1.4         692.2       1.0X
-UTF-8 is set                                     143950         143986         
 55          0.7        1439.5       0.5X
+No encoding                                       73307          73400         
141          1.4         733.1       1.0X
+UTF-8 is set                                     143834         143925         
152          0.7        1438.3       0.5X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       57828          57913         
136          1.7         578.3       1.0X
-UTF-8 is set                                      83649          83711         
 60          1.2         836.5       0.7X
+No encoding                                       50894          51065         
292          2.0         508.9       1.0X
+UTF-8 is set                                      98462          99455        
1173          1.0         984.6       0.5X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       64560          65193        
1023          0.2        6456.0       1.0X
-UTF-8 is set                                     102925         103174         
216          0.1       10292.5       0.6X
+No encoding                                       64011          64969        
1001          0.2        6401.1       1.0X
+UTF-8 is set                                     102757         102984         
311          0.1       10275.7       0.6X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                      131002         132316        
1160          0.0      262003.1       1.0X
-UTF-8 is set                                     152128         152371         
332          0.0      304256.5       0.9X
+No encoding                                      132559         133561        
1010          0.0      265117.3       1.0X
+UTF-8 is set                                     151458         152129         
611          0.0      302915.4       0.9X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 19376          19514         
160          0.5        1937.6       1.0X
-Select 1 column                                   24089          24156         
 58          0.4        2408.9       0.8X
+Select 10 columns                                 21148          21202         
 87          0.5        2114.8       1.0X
+Select 1 column                                   24701          24724         
 21          0.4        2470.1       0.9X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      8131           8219         
103          1.2         813.1       1.0X
-Short column with UTF-8                           13464          13508         
 44          0.7        1346.4       0.6X
-Wide column without encoding                     108012         108598         
914          0.1       10801.2       0.1X
-Wide column with UTF-8                           150988         151369         
412          0.1       15098.8       0.1X
+Short column without encoding                      6945           6998         
 59          1.4         694.5       1.0X
+Short column with UTF-8                           11510          11569         
 51          0.9        1151.0       0.6X
+Wide column without encoding                      95004          95795         
790          0.1        9500.4       0.1X
+Wide column with UTF-8                           149223         149409         
276          0.1       14922.3       0.0X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           753            765         
 18         13.3          75.3       1.0X
-from_json                                         23182          23446         
230          0.4        2318.2       0.0X
-json_tuple                                        31129          31304         
181          0.3        3112.9       0.0X
-get_json_object                                   22821          23073         
225          0.4        2282.1       0.0X
+Text read                                           649            652         
  3         15.4          64.9       1.0X
+from_json                                         22284          22393         
 99          0.4        2228.4       0.0X
+json_tuple                                        32310          32824         
484          0.3        3231.0       0.0X
+get_json_object                                   22111          22751         
568          0.5        2211.1       0.0X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          3078           3101         
 26         16.2          61.6       1.0X
-schema inferring                                  30225          30434         
333          1.7         604.5       0.1X
-parsing                                           32237          32308         
 63          1.6         644.7       0.1X
+Text read                                          2894           2903         
  8         17.3          57.9       1.0X
+schema inferring                                  26724          26785         
 62          1.9         534.5       0.1X
+parsing                                           37502          37632         
131          1.3         750.0       0.1X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                         10835          10900         
 86          4.6         216.7       1.0X
-Schema inferring                                  37720          37805         
110          1.3         754.4       0.3X
-Parsing without charset                           35464          35538         
100          1.4         709.3       0.3X
-Parsing with UTF-8                                67311          67738         
381          0.7        1346.2       0.2X
+Text read                                         10994          11010         
 16          4.5         219.9       1.0X
+Schema inferring                                  45654          45677         
 37          1.1         913.1       0.2X
+Parsing without charset                           34476          34559         
 73          1.5         689.5       0.3X
+Parsing with UTF-8                                56987          57002         
 13          0.9        1139.7       0.2X
 
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     2208           2222         
 14          4.5         220.8       1.0X
-to_json(timestamp)                                14299          14570         
285          0.7        1429.9       0.2X
-write timestamps to files                         12955          12969         
 13          0.8        1295.5       0.2X
-Create a dataset of dates                          2297           2323         
 30          4.4         229.7       1.0X
-to_json(date)                                      8509           8561         
 74          1.2         850.9       0.3X
-write dates to files                               6786           6827         
 45          1.5         678.6       0.3X
+Create a dataset of timestamps                     2150           2188         
 35          4.7         215.0       1.0X
+to_json(timestamp)                                17874          18080         
294          0.6        1787.4       0.1X
+write timestamps to files                         12518          12538         
 34          0.8        1251.8       0.2X
+Create a dataset of dates                          2298           2310         
 18          4.4         229.8       0.9X
+to_json(date)                                     11673          11703         
 27          0.9        1167.3       0.2X
+write dates to files                               7121           7135         
 12          1.4         712.1       0.3X
 
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2598           2613         
 18          3.8         259.8       1.0X
-read timestamps from files                        42007          42028         
 19          0.2        4200.7       0.1X
-infer timestamps from files                       18102          18120         
 28          0.6        1810.2       0.1X
-read date text from files                          2355           2360         
  5          4.2         235.5       1.1X
-read date from files                              17420          17458         
 33          0.6        1742.0       0.1X
-timestamp strings                                  3099           3101         
  3          3.2         309.9       0.8X
-parse timestamps from Dataset[String]             48188          48215         
 25          0.2        4818.8       0.1X
-infer timestamps from Dataset[String]             22929          22988         
102          0.4        2292.9       0.1X
-date strings                                       4090           4103         
 11          2.4         409.0       0.6X
-parse dates from Dataset[String]                  24952          25068         
139          0.4        2495.2       0.1X
-from_json(timestamp)                              66038          66352         
413          0.2        6603.8       0.0X
-from_json(date)                                   43755          43782         
 27          0.2        4375.5       0.1X
+read timestamp text from files                     2616           2641         
 34          3.8         261.6       1.0X
+read timestamps from files                        37481          37517         
 58          0.3        3748.1       0.1X
+infer timestamps from files                       84774          84964         
201          0.1        8477.4       0.0X
+read date text from files                          2362           2365         
  3          4.2         236.2       1.1X
+read date from files                              16583          16612         
 29          0.6        1658.3       0.2X
+timestamp strings                                  3927           3963         
 40          2.5         392.7       0.7X
+parse timestamps from Dataset[String]             52827          53004         
243          0.2        5282.7       0.0X
+infer timestamps from Dataset[String]            101108         101644         
769          0.1       10110.8       0.0X
+date strings                                       4886           4906         
 26          2.0         488.6       0.5X
+parse dates from Dataset[String]                  27623          27694         
 62          0.4        2762.3       0.1X
+from_json(timestamp)                              71764          71887         
124          0.1        7176.4       0.0X
+from_json(date)                                   46200          46314         
 99          0.2        4620.0       0.1X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt 
b/sql/core/benchmarks/JsonBenchmark-results.txt
index 0e4ce90..c22118f 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -7,106 +7,106 @@ OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on 
Linux 4.15.0-1063-aw
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       64950          65182         
306          1.5         649.5       1.0X
-UTF-8 is set                                     129566         129796         
229          0.8        1295.7       0.5X
+No encoding                                       63839          64000         
263          1.6         638.4       1.0X
+UTF-8 is set                                     124633         124945         
429          0.8        1246.3       0.5X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       50896          51277         
372          2.0         509.0       1.0X
-UTF-8 is set                                      89712          89763         
 49          1.1         897.1       0.6X
+No encoding                                       51720          51901         
157          1.9         517.2       1.0X
+UTF-8 is set                                      91161          91190         
 25          1.1         911.6       0.6X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       59415          59785         
372          0.2        5941.5       1.0X
-UTF-8 is set                                     103059         103165         
156          0.1       10305.9       0.6X
+No encoding                                       58486          59038         
714          0.2        5848.6       1.0X
+UTF-8 is set                                     103045         103350         
358          0.1       10304.5       0.6X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                      132951         133122         
288          0.0      265901.9       1.0X
-UTF-8 is set                                     149318         149441         
107          0.0      298635.3       0.9X
+No encoding                                      134909         135024         
105          0.0      269818.6       1.0X
+UTF-8 is set                                     154418         154593         
155          0.0      308836.7       0.9X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 18491          18552         
 85          0.5        1849.1       1.0X
-Select 1 column                                   25908          25946         
 65          0.4        2590.8       0.7X
+Select 10 columns                                 19538          19620         
 70          0.5        1953.8       1.0X
+Select 1 column                                   26142          26159         
 15          0.4        2614.2       0.7X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      9264           9307         
 49          1.1         926.4       1.0X
-Short column with UTF-8                           14707          14727         
 17          0.7        1470.7       0.6X
-Wide column without encoding                     141138         141347         
276          0.1       14113.8       0.1X
-Wide column with UTF-8                           179601         180035         
664          0.1       17960.1       0.1X
+Short column without encoding                      8103           8162         
 53          1.2         810.3       1.0X
+Short column with UTF-8                           13104          13150         
 58          0.8        1310.4       0.6X
+Wide column without encoding                     135280         135593         
375          0.1       13528.0       0.1X
+Wide column with UTF-8                           175189         175483         
278          0.1       17518.9       0.0X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          1173           1184         
  9          8.5         117.3       1.0X
-from_json                                         23432          23738         
338          0.4        2343.2       0.1X
-json_tuple                                        32573          32851         
358          0.3        3257.3       0.0X
-get_json_object                                   22442          22489         
 47          0.4        2244.2       0.1X
+Text read                                          1225           1234         
  8          8.2         122.5       1.0X
+from_json                                         22482          22552         
 95          0.4        2248.2       0.1X
+json_tuple                                        30203          30338         
146          0.3        3020.3       0.0X
+get_json_object                                   22219          22245         
 26          0.5        2221.9       0.1X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          5656           5680         
 31          8.8         113.1       1.0X
-schema inferring                                  33283          33337         
 64          1.5         665.7       0.2X
-parsing                                           41771          41929         
178          1.2         835.4       0.1X
+Text read                                          5897           5904         
 10          8.5         117.9       1.0X
+schema inferring                                  30282          30340         
 50          1.7         605.6       0.2X
+parsing                                           33304          33577         
289          1.5         666.1       0.2X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                          9626           9668         
 39          5.2         192.5       1.0X
-Schema inferring                                  39489          39579         
 91          1.3         789.8       0.2X
-Parsing without charset                           38096          38232         
125          1.3         761.9       0.3X
-Parsing with UTF-8                                64565          64725         
165          0.8        1291.3       0.1X
+Text read                                          9710           9757         
 80          5.1         194.2       1.0X
+Schema inferring                                  35929          35939         
  9          1.4         718.6       0.3X
+Parsing without charset                           39175          39227         
 87          1.3         783.5       0.2X
+Parsing with UTF-8                                59188          59294         
109          0.8        1183.8       0.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1898           1912         
 13          5.3         189.8       1.0X
-to_json(timestamp)                                20011          20092         
119          0.5        2001.1       0.1X
-write timestamps to files                         13388          13427         
 35          0.7        1338.8       0.1X
-Create a dataset of dates                          2351           2368         
 18          4.3         235.1       0.8X
-to_json(date)                                     11884          11913         
 40          0.8        1188.4       0.2X
-write dates to files                               7317           7326         
  9          1.4         731.7       0.3X
+Create a dataset of timestamps                     1967           1977         
  9          5.1         196.7       1.0X
+to_json(timestamp)                                17086          17304         
371          0.6        1708.6       0.1X
+write timestamps to files                         12691          12716         
 28          0.8        1269.1       0.2X
+Create a dataset of dates                          2192           2217         
 39          4.6         219.2       0.9X
+to_json(date)                                     10541          10656         
137          0.9        1054.1       0.2X
+write dates to files                               7259           7311         
 46          1.4         725.9       0.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 
4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2316           2324         
 13          4.3         231.6       1.0X
-read timestamps from files                        43712          43900         
165          0.2        4371.2       0.1X
-infer timestamps from files                       19302          19328         
 38          0.5        1930.2       0.1X
-read date text from files                          2090           2099         
 11          4.8         209.0       1.1X
-read date from files                              18914          18940         
 44          0.5        1891.4       0.1X
-timestamp strings                                  3785           3793         
 11          2.6         378.5       0.6X
-parse timestamps from Dataset[String]             51177          51353         
160          0.2        5117.7       0.0X
-infer timestamps from Dataset[String]             27907          28119         
186          0.4        2790.7       0.1X
-date strings                                       4446           4452         
  6          2.2         444.6       0.5X
-parse dates from Dataset[String]                  28124          28172         
 55          0.4        2812.4       0.1X
-from_json(timestamp)                              71432          71827         
354          0.1        7143.2       0.0X
-from_json(date)                                   46497          46651         
163          0.2        4649.7       0.0X
+read timestamp text from files                     2318           2326         
 13          4.3         231.8       1.0X
+read timestamps from files                        43345          43627         
258          0.2        4334.5       0.1X
+infer timestamps from files                       89570          89621         
 59          0.1        8957.0       0.0X
+read date text from files                          2099           2107         
  9          4.8         209.9       1.1X
+read date from files                              18000          18065         
 98          0.6        1800.0       0.1X
+timestamp strings                                  3937           3956         
 32          2.5         393.7       0.6X
+parse timestamps from Dataset[String]             56001          56429         
539          0.2        5600.1       0.0X
+infer timestamps from Dataset[String]            109410         109963         
559          0.1       10941.0       0.0X
+date strings                                       4530           4540         
  9          2.2         453.0       0.5X
+parse dates from Dataset[String]                  29723          29767         
 72          0.3        2972.3       0.1X
+from_json(timestamp)                              74106          74619         
728          0.1        7410.6       0.0X
+from_json(date)                                   46599          46632         
 32          0.2        4659.9       0.0X
 
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index 5693088..0dbd6b5 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -430,7 +430,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("infer timestamps from files", numIters) { _ =>
-        spark.read.json(timestampDir).noop()
+        spark.read.option("inferTimestamp", true).json(timestampDir).noop()
       }
 
       val dateSchema = new StructType().add("date", DateType)
@@ -460,7 +460,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("infer timestamps from Dataset[String]", numIters) { _ 
=>
-        spark.read.json(timestampStr).noop()
+        spark.read.option("inferTimestamp", true).json(timestampStr).noop()
       }
 
       def dateStr: Dataset[String] = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to