[ 
https://issues.apache.org/jira/browse/SPARK-17733?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Josh Rosen updated SPARK-17733:
-------------------------------
    Attachment: 
SparkSubmit-2016-09-29-1_snapshot___Users_joshrosen_Snapshots__-_YourKit_Java_Profiler_2013_build_13088_-_64-bit.png

> InferFiltersFromConstraints rule never terminates for query
> -----------------------------------------------------------
>
>                 Key: SPARK-17733
>                 URL: https://issues.apache.org/jira/browse/SPARK-17733
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.0
>            Reporter: Josh Rosen
>            Priority: Critical
>         Attachments: 
> SparkSubmit-2016-09-29-1_snapshot___Users_joshrosen_Snapshots__-_YourKit_Java_Profiler_2013_build_13088_-_64-bit.png
>
>
> The following (complicated) example becomes stuck in the 
> {{InferFiltersFromConstraints}} rule and never runs. However, it doesn't fail 
> with a stack overflow and doesn't hit the limit on optimization passes, so I 
> think there's some sort of non-obvious infinite loop within the rule itself.
> {code:title=Table Creation|borderStyle=solid}
>  -- Query #0
> CREATE TEMPORARY VIEW table_4(float_col_1, boolean_col_2, decimal2610_col_3, 
> boolean_col_4, timestamp_col_5, boolean_col_6, bigint_col_7, timestamp_col_8) 
> AS VALUES
>   (CAST(21.920416 AS FLOAT), false, -182.0700000000BD, true, 
> TIMESTAMP('1996-10-24 00:00:00.0'), true, CAST(-993 AS BIGINT), 
> TIMESTAMP('2007-01-13 00:00:00.0')),
>   (CAST(722.4906 AS FLOAT), true, 497.5400000000BD, true, 
> TIMESTAMP('2015-12-14 00:00:00.0'), false, CAST(268 AS BIGINT), 
> TIMESTAMP('2021-04-19 00:00:00.0')),
>   (CAST(534.9996 AS FLOAT), true, -470.8300000000BD, true, 
> TIMESTAMP('1996-01-31 00:00:00.0'), false, CAST(-910 AS BIGINT), 
> TIMESTAMP('2019-10-16 00:00:00.0')),
>   (CAST(-289.6454 AS FLOAT), false, 892.2500000000BD, false, 
> TIMESTAMP('2014-03-14 00:00:00.0'), false, CAST(-462 AS BIGINT), CAST(NULL AS 
> TIMESTAMP)),
>   (CAST(46.395535 AS FLOAT), true, -662.8900000000BD, true, 
> TIMESTAMP('2000-10-16 00:00:00.0'), false, CAST(-656 AS BIGINT), 
> TIMESTAMP('2024-09-01 00:00:00.0')),
>   (CAST(-555.36285 AS FLOAT), true, -938.9300000000BD, true, 
> TIMESTAMP('2007-04-10 00:00:00.0'), true, CAST(252 AS BIGINT), 
> TIMESTAMP('2028-12-03 00:00:00.0')),
>   (CAST(826.29004 AS FLOAT), true, 53.1800000000BD, false, 
> TIMESTAMP('2004-06-11 00:00:00.0'), false, CAST(437 AS BIGINT), 
> TIMESTAMP('1994-04-04 00:00:00.0')),
>   (CAST(-15.276999 AS FLOAT), CAST(NULL AS BOOLEAN), -889.3100000000BD, true, 
> TIMESTAMP('1991-05-23 00:00:00.0'), true, CAST(226 AS BIGINT), 
> TIMESTAMP('2023-07-08 00:00:00.0')),
>   (CAST(385.27386 AS FLOAT), CAST(NULL AS BOOLEAN), -9.9500000000BD, false, 
> TIMESTAMP('2022-10-22 00:00:00.0'), true, CAST(430 AS BIGINT), 
> TIMESTAMP('2013-09-29 00:00:00.0')),
>   (CAST(988.7868 AS FLOAT), CAST(NULL AS BOOLEAN), 715.1700000000BD, false, 
> TIMESTAMP('2026-10-03 00:00:00.0'), true, CAST(-696 AS BIGINT), 
> TIMESTAMP('1990-08-10 00:00:00.0'))
>      ;
>  -- Query #1
> CREATE TEMPORARY VIEW table_1(double_col_1, boolean_col_2, timestamp_col_3, 
> smallint_col_4, boolean_col_5, int_col_6, timestamp_col_7, varchar0008_col_8, 
> int_col_9, string_col_10) AS VALUES
>   (CAST(-147.818640624 AS DOUBLE), CAST(NULL AS BOOLEAN), 
> TIMESTAMP('2012-10-19 00:00:00.0'), CAST(9 AS SMALLINT), false, 77, 
> TIMESTAMP('2014-07-01 00:00:00.0'), '-945', -646, '722'),
>   (CAST(594.195125271 AS DOUBLE), false, TIMESTAMP('2016-12-04 00:00:00.0'), 
> CAST(NULL AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), 
> TIMESTAMP('1999-12-26 00:00:00.0'), '250', -861, '55'),
>   (CAST(-454.171126363 AS DOUBLE), false, TIMESTAMP('2008-12-13 00:00:00.0'), 
> CAST(NULL AS SMALLINT), false, -783, TIMESTAMP('2010-05-28 00:00:00.0'), 
> '211', -959, CAST(NULL AS STRING)),
>   (CAST(437.670945524 AS DOUBLE), true, TIMESTAMP('2011-10-16 00:00:00.0'), 
> CAST(952 AS SMALLINT), true, 297, TIMESTAMP('2013-01-13 00:00:00.0'), '262', 
> CAST(NULL AS INT), '936'),
>   (CAST(-387.226759334 AS DOUBLE), false, TIMESTAMP('2019-10-03 00:00:00.0'), 
> CAST(-496 AS SMALLINT), CAST(NULL AS BOOLEAN), -925, TIMESTAMP('2028-06-27 
> 00:00:00.0'), '-657', 948, '18'),
>   (CAST(-306.138230875 AS DOUBLE), true, TIMESTAMP('1997-10-07 00:00:00.0'), 
> CAST(332 AS SMALLINT), false, 744, TIMESTAMP('1990-09-22 00:00:00.0'), 
> '-345', 566, '-574'),
>   (CAST(675.402140308 AS DOUBLE), false, TIMESTAMP('2017-06-26 00:00:00.0'), 
> CAST(972 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('2026-06-10 
> 00:00:00.0'), '518', 683, '-320'),
>   (CAST(734.839647174 AS DOUBLE), true, TIMESTAMP('1995-06-01 00:00:00.0'), 
> CAST(-792 AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), 
> TIMESTAMP('2021-07-11 00:00:00.0'), '-318', 564, '142'),
>   (CAST(-836.513475295 AS DOUBLE), true, TIMESTAMP('2027-01-02 00:00:00.0'), 
> CAST(-446 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('1993-09-01 
> 00:00:00.0'), '771', CAST(NULL AS INT), '977'),
>   (CAST(-768.883638815 AS DOUBLE), false, TIMESTAMP('1994-02-11 00:00:00.0'), 
> CAST(-244 AS SMALLINT), true, -493, TIMESTAMP('1994-01-02 00:00:00.0'), 
> '-921', CAST(NULL AS INT), '-409')
>      ;
>  -- Query #2
> CREATE TEMPORARY VIEW table_5(float_col_1, varchar0138_col_2, string_col_3, 
> decimal2211_col_4, float_col_5, string_col_6, timestamp_col_7, 
> varchar0207_col_8) AS VALUES
>   (CAST(-885.7606 AS FLOAT), '-740', '680', -929.06000000000BD, CAST(NULL AS 
> FLOAT), '-915', TIMESTAMP('1994-09-12 00:00:00.0'), CAST(NULL AS STRING)),
>   (CAST(NULL AS FLOAT), '489', '692', -220.60000000000BD, CAST(939.18964 AS 
> FLOAT), '-514', CAST(NULL AS TIMESTAMP), '181'),
>   (CAST(210.7055 AS FLOAT), '44', CAST(NULL AS STRING), -174.70000000000BD, 
> CAST(760.21045 AS FLOAT), '325', TIMESTAMP('2019-09-25 00:00:00.0'), '505'),
>   (CAST(952.8074 AS FLOAT), '838', '705', CAST(NULL AS DECIMAL(22,11)), 
> CAST(NULL AS FLOAT), '-62', TIMESTAMP('2029-05-22 00:00:00.0'), CAST(NULL AS 
> STRING)),
>   (CAST(-113.300446 AS FLOAT), '-210', '765', CAST(NULL AS DECIMAL(22,11)), 
> CAST(-819.2468 AS FLOAT), '-829', CAST(NULL AS TIMESTAMP), '465'),
>   (CAST(-739.9902 AS FLOAT), '614', '-393', -509.22000000000BD, 
> CAST(-339.78568 AS FLOAT), '568', TIMESTAMP('2013-05-14 00:00:00.0'), '305'),
>   (CAST(976.0611 AS FLOAT), '670', '71', 663.23000000000BD, CAST(-685.9362 AS 
> FLOAT), '42', CAST(NULL AS TIMESTAMP), '150'),
>   (CAST(NULL AS FLOAT), '302', '-404', -349.42000000000BD, CAST(2.113715 AS 
> FLOAT), '-703', TIMESTAMP('2003-01-09 00:00:00.0'), '-863'),
>   (CAST(-40.604317 AS FLOAT), '856', '632', 844.57000000000BD, CAST(-730.8376 
> AS FLOAT), '151', TIMESTAMP('2021-05-11 00:00:00.0'), '494'),
>   (CAST(884.62714 AS FLOAT), '-195', '960', -664.40000000000BD, CAST(374.4844 
> AS FLOAT), '814', TIMESTAMP('2006-06-12 00:00:00.0'), '-900')
>      ;
>  -- Query #3
> CREATE TEMPORARY VIEW table_2(bigint_col_1, boolean_col_2, double_col_3, 
> double_col_4, double_col_5, varchar0164_col_6) AS VALUES
>   (CAST(-374 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(939.626553676 AS 
> DOUBLE), CAST(-777.275379746 AS DOUBLE), CAST(235.613760023 AS DOUBLE), '86'),
>   (CAST(324 AS BIGINT), true, CAST(-507.23760783 AS DOUBLE), CAST(NULL AS 
> DOUBLE), CAST(966.753434439 AS DOUBLE), '304'),
>   (CAST(882 AS BIGINT), false, CAST(-366.529706229 AS DOUBLE), 
> CAST(787.000491043 AS DOUBLE), CAST(-331.333188698 AS DOUBLE), '158'),
>   (CAST(-510 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(-855.344932257 AS 
> DOUBLE), CAST(-858.167264921 AS DOUBLE), CAST(NULL AS DOUBLE), '-419'),
>   (CAST(-13 AS BIGINT), false, CAST(589.966987492 AS DOUBLE), CAST(NULL AS 
> DOUBLE), CAST(-653.515783257 AS DOUBLE), '970'),
>   (CAST(-361 AS BIGINT), true, CAST(-413.021011259 AS DOUBLE), 
> CAST(-716.638705947 AS DOUBLE), CAST(-936.480108205 AS DOUBLE), '807'),
>   (CAST(815 AS BIGINT), true, CAST(-643.690268711 AS DOUBLE), 
> CAST(-684.206112496 AS DOUBLE), CAST(335.557479371 AS DOUBLE), '-872'),
>   (CAST(617 AS BIGINT), true, CAST(-93.3806447556 AS DOUBLE), 
> CAST(-322.66171021 AS DOUBLE), CAST(-951.18299435 AS DOUBLE), '-167'),
>   (CAST(-876 AS BIGINT), false, CAST(-481.774062168 AS DOUBLE), 
> CAST(-204.40537387 AS DOUBLE), CAST(224.889845986 AS DOUBLE), '-986'),
>   (CAST(2 AS BIGINT), false, CAST(462.843898322 AS DOUBLE), 
> CAST(-9.85549856798 AS DOUBLE), CAST(-549.875829922 AS DOUBLE), '121')
>      ;
>  -- Query #4
> CREATE TEMPORARY VIEW table_3(string_col_1, float_col_2, timestamp_col_3, 
> boolean_col_4, timestamp_col_5, decimal3317_col_6) AS VALUES
>   ('-450', CAST(-903.6053 AS FLOAT), CAST(NULL AS TIMESTAMP), true, 
> TIMESTAMP('2020-08-22 00:00:00.0'), -376.39000000000000000BD),
>   ('698', CAST(402.56534 AS FLOAT), TIMESTAMP('2013-10-13 00:00:00.0'), true, 
> TIMESTAMP('2012-11-06 00:00:00.0'), -498.81000000000000000BD),
>   ('139', CAST(-895.7336 AS FLOAT), TIMESTAMP('2018-09-08 00:00:00.0'), true, 
> TIMESTAMP('2019-03-13 00:00:00.0'), CAST(NULL AS DECIMAL(33,17))),
>   ('616', CAST(-464.9475 AS FLOAT), TIMESTAMP('2028-05-18 00:00:00.0'), true, 
> TIMESTAMP('2016-05-22 00:00:00.0'), -109.88000000000000000BD),
>   ('943', CAST(605.42303 AS FLOAT), TIMESTAMP('1996-08-04 00:00:00.0'), 
> false, TIMESTAMP('2028-05-18 00:00:00.0'), 201.36000000000000000BD),
>   ('-764', CAST(-503.56726 AS FLOAT), TIMESTAMP('1990-02-28 00:00:00.0'), 
> false, CAST(NULL AS TIMESTAMP), 211.25000000000000000BD),
>   ('-587', CAST(84.67886 AS FLOAT), TIMESTAMP('2013-06-06 00:00:00.0'), true, 
> TIMESTAMP('2022-05-07 00:00:00.0'), 90.75000000000000000BD),
>   ('712', CAST(141.08926 AS FLOAT), TIMESTAMP('2001-05-12 00:00:00.0'), true, 
> TIMESTAMP('2019-11-22 00:00:00.0'), 929.89000000000000000BD),
>   ('948', CAST(0.74294764 AS FLOAT), TIMESTAMP('2002-06-14 00:00:00.0'), 
> false, TIMESTAMP('1990-01-13 00:00:00.0'), -100.90000000000000000BD),
>   ('-201', CAST(366.82578 AS FLOAT), TIMESTAMP('2015-11-28 00:00:00.0'), 
> false, CAST(NULL AS TIMESTAMP), 196.33000000000000000BD)
>      ;
> {code}
> {code:title=Query|borderStyle=solid}
> SELECT
> t1.int_col_2,
> (t1.bigint_col_7) / (t2.double_col_3) AS float_col,
> TRIM(t2.varchar0164_col_6) AS char_col
> FROM (
> SELECT
> COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7) AS int_col,
> t1.bigint_col_7,
> t2.bigint_col_7 AS int_col_1,
> t1.bigint_col_7 AS int_col_2,
> COALESCE(t2.bigint_col_7, (COALESCE(t1.bigint_col_7, t2.bigint_col_7, 
> t2.bigint_col_7)) - (t1.bigint_col_7), MIN(t2.bigint_col_7)) AS int_col_3
> FROM table_4 t1
> INNER JOIN table_4 t2 ON ((t2.timestamp_col_5) = (t1.timestamp_col_8)) AND 
> ((t2.decimal2610_col_3) = (t1.decimal2610_col_3))
> WHERE
> (t1.bigint_col_7) IN (t2.bigint_col_7, t2.bigint_col_7)
> GROUP BY
> COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7),
> t1.bigint_col_7,
> t2.bigint_col_7,
> t1.bigint_col_7
> HAVING
> (MIN(COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7))) NOT IN 
> (423.13, t2.bigint_col_7)
> UNION
> SELECT
> MIN((436) * (927)) OVER (ORDER BY (t2.int_col_6) * (NULL) DESC, 
> (t1.smallint_col_4) - (t2.int_col_6) DESC ROWS BETWEEN 96 PRECEDING AND 16 
> PRECEDING) AS int_col,
> (t2.int_col_6) * (NULL) AS decimal_col,
> (t1.smallint_col_4) - (t2.int_col_6) AS int_col_1,
> LAG((449) * (-157.519107824), 46) OVER (ORDER BY (t2.int_col_6) * (NULL) 
> DESC, (t1.smallint_col_4) - (t2.int_col_6) DESC) AS float_col,
> AVG((669) - (-773)) OVER (ORDER BY (t2.int_col_6) * (NULL) ASC, 
> (t1.smallint_col_4) - (t2.int_col_6) ASC ROWS BETWEEN CURRENT ROW AND 62 
> FOLLOWING) AS float_col_1
> FROM table_1 t1
> INNER JOIN table_1 t2 ON ((t2.smallint_col_4) = (t1.int_col_9)) AND 
> ((t2.smallint_col_4) = (t1.int_col_6))
> ) t1
> INNER JOIN table_2 t2 ON (((t2.bigint_col_1) = (t1.bigint_col_7)) AND 
> ((t2.bigint_col_1) = (t1.int_col))) AND ((t2.bigint_col_1) = (t1.int_col_1))
> GROUP BY
> t1.int_col_2,
> (t1.bigint_col_7) / (t2.double_col_3),
> TRIM(t2.varchar0164_col_6)
> {code}
> I attached YourKit to my Spark process and recorded some stack traces. See 
> the attached screenshots showing the distribution of time for the hung query.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to