[ https://issues.apache.org/jira/browse/SPARK-17733?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Josh Rosen updated SPARK-17733: ------------------------------- Attachment: SparkSubmit-2016-09-29-1_snapshot___Users_joshrosen_Snapshots__-_YourKit_Java_Profiler_2013_build_13088_-_64-bit.png > InferFiltersFromConstraints rule never terminates for query > ----------------------------------------------------------- > > Key: SPARK-17733 > URL: https://issues.apache.org/jira/browse/SPARK-17733 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.0.0 > Reporter: Josh Rosen > Priority: Critical > Attachments: > SparkSubmit-2016-09-29-1_snapshot___Users_joshrosen_Snapshots__-_YourKit_Java_Profiler_2013_build_13088_-_64-bit.png > > > The following (complicated) example becomes stuck in the > {{InferFiltersFromConstraints}} rule and never runs. However, it doesn't fail > with a stack overflow and doesn't hit the limit on optimization passes, so I > think there's some sort of non-obvious infinite loop within the rule itself. > {code:title=Table Creation|borderStyle=solid} > -- Query #0 > CREATE TEMPORARY VIEW table_4(float_col_1, boolean_col_2, decimal2610_col_3, > boolean_col_4, timestamp_col_5, boolean_col_6, bigint_col_7, timestamp_col_8) > AS VALUES > (CAST(21.920416 AS FLOAT), false, -182.0700000000BD, true, > TIMESTAMP('1996-10-24 00:00:00.0'), true, CAST(-993 AS BIGINT), > TIMESTAMP('2007-01-13 00:00:00.0')), > (CAST(722.4906 AS FLOAT), true, 497.5400000000BD, true, > TIMESTAMP('2015-12-14 00:00:00.0'), false, CAST(268 AS BIGINT), > TIMESTAMP('2021-04-19 00:00:00.0')), > (CAST(534.9996 AS FLOAT), true, -470.8300000000BD, true, > TIMESTAMP('1996-01-31 00:00:00.0'), false, CAST(-910 AS BIGINT), > TIMESTAMP('2019-10-16 00:00:00.0')), > (CAST(-289.6454 AS FLOAT), false, 892.2500000000BD, false, > TIMESTAMP('2014-03-14 00:00:00.0'), false, CAST(-462 AS BIGINT), CAST(NULL AS > TIMESTAMP)), > (CAST(46.395535 AS FLOAT), true, -662.8900000000BD, true, > TIMESTAMP('2000-10-16 00:00:00.0'), false, CAST(-656 AS BIGINT), > TIMESTAMP('2024-09-01 00:00:00.0')), > (CAST(-555.36285 AS FLOAT), true, -938.9300000000BD, true, > TIMESTAMP('2007-04-10 00:00:00.0'), true, CAST(252 AS BIGINT), > TIMESTAMP('2028-12-03 00:00:00.0')), > (CAST(826.29004 AS FLOAT), true, 53.1800000000BD, false, > TIMESTAMP('2004-06-11 00:00:00.0'), false, CAST(437 AS BIGINT), > TIMESTAMP('1994-04-04 00:00:00.0')), > (CAST(-15.276999 AS FLOAT), CAST(NULL AS BOOLEAN), -889.3100000000BD, true, > TIMESTAMP('1991-05-23 00:00:00.0'), true, CAST(226 AS BIGINT), > TIMESTAMP('2023-07-08 00:00:00.0')), > (CAST(385.27386 AS FLOAT), CAST(NULL AS BOOLEAN), -9.9500000000BD, false, > TIMESTAMP('2022-10-22 00:00:00.0'), true, CAST(430 AS BIGINT), > TIMESTAMP('2013-09-29 00:00:00.0')), > (CAST(988.7868 AS FLOAT), CAST(NULL AS BOOLEAN), 715.1700000000BD, false, > TIMESTAMP('2026-10-03 00:00:00.0'), true, CAST(-696 AS BIGINT), > TIMESTAMP('1990-08-10 00:00:00.0')) > ; > -- Query #1 > CREATE TEMPORARY VIEW table_1(double_col_1, boolean_col_2, timestamp_col_3, > smallint_col_4, boolean_col_5, int_col_6, timestamp_col_7, varchar0008_col_8, > int_col_9, string_col_10) AS VALUES > (CAST(-147.818640624 AS DOUBLE), CAST(NULL AS BOOLEAN), > TIMESTAMP('2012-10-19 00:00:00.0'), CAST(9 AS SMALLINT), false, 77, > TIMESTAMP('2014-07-01 00:00:00.0'), '-945', -646, '722'), > (CAST(594.195125271 AS DOUBLE), false, TIMESTAMP('2016-12-04 00:00:00.0'), > CAST(NULL AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), > TIMESTAMP('1999-12-26 00:00:00.0'), '250', -861, '55'), > (CAST(-454.171126363 AS DOUBLE), false, TIMESTAMP('2008-12-13 00:00:00.0'), > CAST(NULL AS SMALLINT), false, -783, TIMESTAMP('2010-05-28 00:00:00.0'), > '211', -959, CAST(NULL AS STRING)), > (CAST(437.670945524 AS DOUBLE), true, TIMESTAMP('2011-10-16 00:00:00.0'), > CAST(952 AS SMALLINT), true, 297, TIMESTAMP('2013-01-13 00:00:00.0'), '262', > CAST(NULL AS INT), '936'), > (CAST(-387.226759334 AS DOUBLE), false, TIMESTAMP('2019-10-03 00:00:00.0'), > CAST(-496 AS SMALLINT), CAST(NULL AS BOOLEAN), -925, TIMESTAMP('2028-06-27 > 00:00:00.0'), '-657', 948, '18'), > (CAST(-306.138230875 AS DOUBLE), true, TIMESTAMP('1997-10-07 00:00:00.0'), > CAST(332 AS SMALLINT), false, 744, TIMESTAMP('1990-09-22 00:00:00.0'), > '-345', 566, '-574'), > (CAST(675.402140308 AS DOUBLE), false, TIMESTAMP('2017-06-26 00:00:00.0'), > CAST(972 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('2026-06-10 > 00:00:00.0'), '518', 683, '-320'), > (CAST(734.839647174 AS DOUBLE), true, TIMESTAMP('1995-06-01 00:00:00.0'), > CAST(-792 AS SMALLINT), CAST(NULL AS BOOLEAN), CAST(NULL AS INT), > TIMESTAMP('2021-07-11 00:00:00.0'), '-318', 564, '142'), > (CAST(-836.513475295 AS DOUBLE), true, TIMESTAMP('2027-01-02 00:00:00.0'), > CAST(-446 AS SMALLINT), true, CAST(NULL AS INT), TIMESTAMP('1993-09-01 > 00:00:00.0'), '771', CAST(NULL AS INT), '977'), > (CAST(-768.883638815 AS DOUBLE), false, TIMESTAMP('1994-02-11 00:00:00.0'), > CAST(-244 AS SMALLINT), true, -493, TIMESTAMP('1994-01-02 00:00:00.0'), > '-921', CAST(NULL AS INT), '-409') > ; > -- Query #2 > CREATE TEMPORARY VIEW table_5(float_col_1, varchar0138_col_2, string_col_3, > decimal2211_col_4, float_col_5, string_col_6, timestamp_col_7, > varchar0207_col_8) AS VALUES > (CAST(-885.7606 AS FLOAT), '-740', '680', -929.06000000000BD, CAST(NULL AS > FLOAT), '-915', TIMESTAMP('1994-09-12 00:00:00.0'), CAST(NULL AS STRING)), > (CAST(NULL AS FLOAT), '489', '692', -220.60000000000BD, CAST(939.18964 AS > FLOAT), '-514', CAST(NULL AS TIMESTAMP), '181'), > (CAST(210.7055 AS FLOAT), '44', CAST(NULL AS STRING), -174.70000000000BD, > CAST(760.21045 AS FLOAT), '325', TIMESTAMP('2019-09-25 00:00:00.0'), '505'), > (CAST(952.8074 AS FLOAT), '838', '705', CAST(NULL AS DECIMAL(22,11)), > CAST(NULL AS FLOAT), '-62', TIMESTAMP('2029-05-22 00:00:00.0'), CAST(NULL AS > STRING)), > (CAST(-113.300446 AS FLOAT), '-210', '765', CAST(NULL AS DECIMAL(22,11)), > CAST(-819.2468 AS FLOAT), '-829', CAST(NULL AS TIMESTAMP), '465'), > (CAST(-739.9902 AS FLOAT), '614', '-393', -509.22000000000BD, > CAST(-339.78568 AS FLOAT), '568', TIMESTAMP('2013-05-14 00:00:00.0'), '305'), > (CAST(976.0611 AS FLOAT), '670', '71', 663.23000000000BD, CAST(-685.9362 AS > FLOAT), '42', CAST(NULL AS TIMESTAMP), '150'), > (CAST(NULL AS FLOAT), '302', '-404', -349.42000000000BD, CAST(2.113715 AS > FLOAT), '-703', TIMESTAMP('2003-01-09 00:00:00.0'), '-863'), > (CAST(-40.604317 AS FLOAT), '856', '632', 844.57000000000BD, CAST(-730.8376 > AS FLOAT), '151', TIMESTAMP('2021-05-11 00:00:00.0'), '494'), > (CAST(884.62714 AS FLOAT), '-195', '960', -664.40000000000BD, CAST(374.4844 > AS FLOAT), '814', TIMESTAMP('2006-06-12 00:00:00.0'), '-900') > ; > -- Query #3 > CREATE TEMPORARY VIEW table_2(bigint_col_1, boolean_col_2, double_col_3, > double_col_4, double_col_5, varchar0164_col_6) AS VALUES > (CAST(-374 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(939.626553676 AS > DOUBLE), CAST(-777.275379746 AS DOUBLE), CAST(235.613760023 AS DOUBLE), '86'), > (CAST(324 AS BIGINT), true, CAST(-507.23760783 AS DOUBLE), CAST(NULL AS > DOUBLE), CAST(966.753434439 AS DOUBLE), '304'), > (CAST(882 AS BIGINT), false, CAST(-366.529706229 AS DOUBLE), > CAST(787.000491043 AS DOUBLE), CAST(-331.333188698 AS DOUBLE), '158'), > (CAST(-510 AS BIGINT), CAST(NULL AS BOOLEAN), CAST(-855.344932257 AS > DOUBLE), CAST(-858.167264921 AS DOUBLE), CAST(NULL AS DOUBLE), '-419'), > (CAST(-13 AS BIGINT), false, CAST(589.966987492 AS DOUBLE), CAST(NULL AS > DOUBLE), CAST(-653.515783257 AS DOUBLE), '970'), > (CAST(-361 AS BIGINT), true, CAST(-413.021011259 AS DOUBLE), > CAST(-716.638705947 AS DOUBLE), CAST(-936.480108205 AS DOUBLE), '807'), > (CAST(815 AS BIGINT), true, CAST(-643.690268711 AS DOUBLE), > CAST(-684.206112496 AS DOUBLE), CAST(335.557479371 AS DOUBLE), '-872'), > (CAST(617 AS BIGINT), true, CAST(-93.3806447556 AS DOUBLE), > CAST(-322.66171021 AS DOUBLE), CAST(-951.18299435 AS DOUBLE), '-167'), > (CAST(-876 AS BIGINT), false, CAST(-481.774062168 AS DOUBLE), > CAST(-204.40537387 AS DOUBLE), CAST(224.889845986 AS DOUBLE), '-986'), > (CAST(2 AS BIGINT), false, CAST(462.843898322 AS DOUBLE), > CAST(-9.85549856798 AS DOUBLE), CAST(-549.875829922 AS DOUBLE), '121') > ; > -- Query #4 > CREATE TEMPORARY VIEW table_3(string_col_1, float_col_2, timestamp_col_3, > boolean_col_4, timestamp_col_5, decimal3317_col_6) AS VALUES > ('-450', CAST(-903.6053 AS FLOAT), CAST(NULL AS TIMESTAMP), true, > TIMESTAMP('2020-08-22 00:00:00.0'), -376.39000000000000000BD), > ('698', CAST(402.56534 AS FLOAT), TIMESTAMP('2013-10-13 00:00:00.0'), true, > TIMESTAMP('2012-11-06 00:00:00.0'), -498.81000000000000000BD), > ('139', CAST(-895.7336 AS FLOAT), TIMESTAMP('2018-09-08 00:00:00.0'), true, > TIMESTAMP('2019-03-13 00:00:00.0'), CAST(NULL AS DECIMAL(33,17))), > ('616', CAST(-464.9475 AS FLOAT), TIMESTAMP('2028-05-18 00:00:00.0'), true, > TIMESTAMP('2016-05-22 00:00:00.0'), -109.88000000000000000BD), > ('943', CAST(605.42303 AS FLOAT), TIMESTAMP('1996-08-04 00:00:00.0'), > false, TIMESTAMP('2028-05-18 00:00:00.0'), 201.36000000000000000BD), > ('-764', CAST(-503.56726 AS FLOAT), TIMESTAMP('1990-02-28 00:00:00.0'), > false, CAST(NULL AS TIMESTAMP), 211.25000000000000000BD), > ('-587', CAST(84.67886 AS FLOAT), TIMESTAMP('2013-06-06 00:00:00.0'), true, > TIMESTAMP('2022-05-07 00:00:00.0'), 90.75000000000000000BD), > ('712', CAST(141.08926 AS FLOAT), TIMESTAMP('2001-05-12 00:00:00.0'), true, > TIMESTAMP('2019-11-22 00:00:00.0'), 929.89000000000000000BD), > ('948', CAST(0.74294764 AS FLOAT), TIMESTAMP('2002-06-14 00:00:00.0'), > false, TIMESTAMP('1990-01-13 00:00:00.0'), -100.90000000000000000BD), > ('-201', CAST(366.82578 AS FLOAT), TIMESTAMP('2015-11-28 00:00:00.0'), > false, CAST(NULL AS TIMESTAMP), 196.33000000000000000BD) > ; > {code} > {code:title=Query|borderStyle=solid} > SELECT > t1.int_col_2, > (t1.bigint_col_7) / (t2.double_col_3) AS float_col, > TRIM(t2.varchar0164_col_6) AS char_col > FROM ( > SELECT > COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7) AS int_col, > t1.bigint_col_7, > t2.bigint_col_7 AS int_col_1, > t1.bigint_col_7 AS int_col_2, > COALESCE(t2.bigint_col_7, (COALESCE(t1.bigint_col_7, t2.bigint_col_7, > t2.bigint_col_7)) - (t1.bigint_col_7), MIN(t2.bigint_col_7)) AS int_col_3 > FROM table_4 t1 > INNER JOIN table_4 t2 ON ((t2.timestamp_col_5) = (t1.timestamp_col_8)) AND > ((t2.decimal2610_col_3) = (t1.decimal2610_col_3)) > WHERE > (t1.bigint_col_7) IN (t2.bigint_col_7, t2.bigint_col_7) > GROUP BY > COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7), > t1.bigint_col_7, > t2.bigint_col_7, > t1.bigint_col_7 > HAVING > (MIN(COALESCE(t1.bigint_col_7, t2.bigint_col_7, t2.bigint_col_7))) NOT IN > (423.13, t2.bigint_col_7) > UNION > SELECT > MIN((436) * (927)) OVER (ORDER BY (t2.int_col_6) * (NULL) DESC, > (t1.smallint_col_4) - (t2.int_col_6) DESC ROWS BETWEEN 96 PRECEDING AND 16 > PRECEDING) AS int_col, > (t2.int_col_6) * (NULL) AS decimal_col, > (t1.smallint_col_4) - (t2.int_col_6) AS int_col_1, > LAG((449) * (-157.519107824), 46) OVER (ORDER BY (t2.int_col_6) * (NULL) > DESC, (t1.smallint_col_4) - (t2.int_col_6) DESC) AS float_col, > AVG((669) - (-773)) OVER (ORDER BY (t2.int_col_6) * (NULL) ASC, > (t1.smallint_col_4) - (t2.int_col_6) ASC ROWS BETWEEN CURRENT ROW AND 62 > FOLLOWING) AS float_col_1 > FROM table_1 t1 > INNER JOIN table_1 t2 ON ((t2.smallint_col_4) = (t1.int_col_9)) AND > ((t2.smallint_col_4) = (t1.int_col_6)) > ) t1 > INNER JOIN table_2 t2 ON (((t2.bigint_col_1) = (t1.bigint_col_7)) AND > ((t2.bigint_col_1) = (t1.int_col))) AND ((t2.bigint_col_1) = (t1.int_col_1)) > GROUP BY > t1.int_col_2, > (t1.bigint_col_7) / (t2.double_col_3), > TRIM(t2.varchar0164_col_6) > {code} > I attached YourKit to my Spark process and recorded some stack traces. See > the attached screenshots showing the distribution of time for the hung query. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org