[
https://issues.apache.org/jira/browse/IMPALA-14863?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Abhishek Rawat updated IMPALA-14863:
------------------------------------
Description:
During many-to-many joins, if {{other join predicates}} containing string
functions (e.g., {{{}UPPER(){}}}) evaluate to {{{}FALSE{}}}, the output batch
never reaches capacity. This traps execution inside the {{ProcessProbeBatch}}
inner loop, preventing the return to {{GetNext()}} where temporary expression
memory is normally freed. Consequently, temporary expression memory grows
infinitely without garbage collection until the {{impalad}} process crashes
from OOM.
Repro Steps:
{code:java}
CREATE TABLE test_left_fact (
product_sk BIGINT,
drv_event_type STRING
);
CREATE TABLE test_right_cte (
product_sk BIGINT,
product_type STRING
);
-- Create a temporary view of 10 numbers
WITH ten AS (
SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION
ALL
SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL
SELECT 8 UNION ALL SELECT 9
)
-- 1. Insert 1,024 rows into the Left Fact table (Exactly 1 Impala Batch)
-- All rows share the same join key (999)
INSERT INTO test_left_fact
SELECT
999 AS product_sk,
'fail_event' AS drv_event_type
FROM ten a CROSS JOIN ten b CROSS JOIN ten c
LIMIT 1024;
-- 2. Insert 1,000,000 rows into the Right CTE table
-- All 1 Million rows share the exact same join key (999)
WITH ten AS (
SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION
ALL
SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL
SELECT 8 UNION ALL SELECT 9
)
INSERT INTO test_right_cte
SELECT
999 AS product_sk,
'wrong_type' AS product_type
FROM ten a
CROSS JOIN ten b
CROSS JOIN ten c
CROSS JOIN ten d
CROSS JOIN ten e
CROSS JOIN ten f;
-- OOM Query
SELECT STRAIGHT_JOIN count(1)
FROM test_left_fact f
LEFT JOIN test_right_cte p2
ON f.product_sk = p2.product_sk
WHERE (
-- Condition 1: Evaluates to FALSE
UPPER(f.drv_event_type) LIKE '%_CONF'
OR
-- Condition 2: The Memory Trap
-- Because Condition 1 is FALSE, Impala MUST evaluate this entire block for
all 1000,000 matches.
-- Every UPPER() and CONCAT() asks the OS for fresh memory.
(
CASE
WHEN UPPER(p2.product_type) = 'COMMODITY-SWAP'
THEN UPPER(CONCAT(f.drv_event_type, '_SWAP_1'))
WHEN UPPER(p2.product_type) = 'METAL-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_METAL_2'))
WHEN UPPER(p2.product_type) = 'ENERGY-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_ENERGY_3'))
WHEN UPPER(p2.product_type) = 'AGRICULTURAL-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_AGRI_4'))
WHEN UPPER(p2.product_type) = 'SOFT-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_SOFT_5'))
WHEN UPPER(p2.product_type) = 'OTHER-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_OTHER_6'))
WHEN UPPER(p2.product_type) = 'METAL-OPTION'
THEN UPPER(CONCAT(f.drv_event_type, '_MOPT_7'))
WHEN UPPER(p2.product_type) = 'ENERGY-OPTION'
THEN UPPER(CONCAT(f.drv_event_type, '_EOPT_8'))
WHEN UPPER(p2.product_type) = 'SPREAD-OPTION'
THEN UPPER(CONCAT(f.drv_event_type, '_SOPT_9'))
WHEN UPPER(p2.product_type) = 'COMMODITY-INDEX'
THEN UPPER(CONCAT(f.drv_event_type, '_INDEX_10'))
-- The fallback generates two more uppercase string allocations
ELSE UPPER(CONCAT(UPPER(f.drv_event_type), '_UNKNOWN'))
END = 'COMMODITY-FORWARD'
)
); {code}
was:
During many-to-many joins, if {{other join predicates}} containing string
functions (e.g., {{{}UPPER(){}}}) evaluate to {{{}FALSE{}}}, the output batch
never reaches capacity. This traps execution inside the {{ProcessProbeBatch}}
inner loop, preventing the return to {{GetNext()}} where temporary expression
memory is normally freed. Consequently, temporary expression memory grows
infinitely without garbage collection until the {{impalad}} process crashes
from OOM.
Repro Steps:
{code:java}
CREATE TABLE test_left_fact (
product_sk BIGINT,
drv_event_type STRING
);
CREATE TABLE test_right_cte (
product_sk BIGINT,
product_type STRING
);
-- Create a temporary view of 10 numbers
WITH ten AS (
SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION
ALL
SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL
SELECT 8 UNION ALL SELECT 9
)
-- 1. Insert 1,024 rows into the Left Fact table (Exactly 1 Impala Batch)
-- All rows share the same join key (999)
INSERT INTO test_left_fact
SELECT
999 AS product_sk,
'fail_event' AS drv_event_type
FROM ten a CROSS JOIN ten b CROSS JOIN ten c
LIMIT 1024;
WITH ten AS (
SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION
ALL
SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL
SELECT 8 UNION ALL SELECT 9
)-- Insert 1,000,000 rows into the Right CTE table
-- All 1 Million rows share the exact same join key (999)
INSERT INTO test_right_cte
SELECT
999 AS product_sk,
'wrong_type' AS product_type
FROM ten a
CROSS JOIN ten b
CROSS JOIN ten c
CROSS JOIN ten d
CROSS JOIN ten e
CROSS JOIN ten f;
select count(*) from test_right_cte;SELECT STRAIGHT_JOIN count(1)
FROM test_left_fact f
LEFT JOIN test_right_cte p2
ON f.product_sk = p2.product_sk
WHERE (
-- Condition 1: Evaluates to FALSE
UPPER(f.drv_event_type) LIKE '%_CONF'
OR
-- Condition 2: The Memory Trap
-- Because Condition 1 is FALSE, Impala MUST evaluate this entire block for
all 100,000 matches.
-- Every UPPER() and CONCAT() asks the OS for fresh memory.
(
CASE
WHEN UPPER(p2.product_type) = 'COMMODITY-SWAP'
THEN UPPER(CONCAT(f.drv_event_type, '_SWAP_1'))
WHEN UPPER(p2.product_type) = 'METAL-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_METAL_2'))
WHEN UPPER(p2.product_type) = 'ENERGY-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_ENERGY_3'))
WHEN UPPER(p2.product_type) = 'AGRICULTURAL-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_AGRI_4'))
WHEN UPPER(p2.product_type) = 'SOFT-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_SOFT_5'))
WHEN UPPER(p2.product_type) = 'OTHER-FUTURE'
THEN UPPER(CONCAT(f.drv_event_type, '_OTHER_6'))
WHEN UPPER(p2.product_type) = 'METAL-OPTION'
THEN UPPER(CONCAT(f.drv_event_type, '_MOPT_7'))
WHEN UPPER(p2.product_type) = 'ENERGY-OPTION'
THEN UPPER(CONCAT(f.drv_event_type, '_EOPT_8'))
WHEN UPPER(p2.product_type) = 'SPREAD-OPTION'
THEN UPPER(CONCAT(f.drv_event_type, '_SOPT_9'))
WHEN UPPER(p2.product_type) = 'COMMODITY-INDEX'
THEN UPPER(CONCAT(f.drv_event_type, '_INDEX_10'))
-- The fallback generates two more uppercase string allocations
ELSE UPPER(CONCAT(UPPER(f.drv_event_type), '_UNKNOWN'))
END = 'COMMODITY-FORWARD'
)
); {code}
> OOM in HashJoinNode due to unbounded Exprs pool allocations
> -----------------------------------------------------------
>
> Key: IMPALA-14863
> URL: https://issues.apache.org/jira/browse/IMPALA-14863
> Project: IMPALA
> Issue Type: Bug
> Components: Backend
> Reporter: Abhishek Rawat
> Priority: Critical
>
> During many-to-many joins, if {{other join predicates}} containing string
> functions (e.g., {{{}UPPER(){}}}) evaluate to {{{}FALSE{}}}, the output batch
> never reaches capacity. This traps execution inside the {{ProcessProbeBatch}}
> inner loop, preventing the return to {{GetNext()}} where temporary expression
> memory is normally freed. Consequently, temporary expression memory grows
> infinitely without garbage collection until the {{impalad}} process crashes
> from OOM.
>
> Repro Steps:
> {code:java}
> CREATE TABLE test_left_fact (
> product_sk BIGINT,
> drv_event_type STRING
> );
> CREATE TABLE test_right_cte (
> product_sk BIGINT,
> product_type STRING
> );
> -- Create a temporary view of 10 numbers
> WITH ten AS (
> SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
> UNION ALL
> SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL
> SELECT 8 UNION ALL SELECT 9
> )
> -- 1. Insert 1,024 rows into the Left Fact table (Exactly 1 Impala Batch)
> -- All rows share the same join key (999)
> INSERT INTO test_left_fact
> SELECT
> 999 AS product_sk,
> 'fail_event' AS drv_event_type
> FROM ten a CROSS JOIN ten b CROSS JOIN ten c
> LIMIT 1024;
> -- 2. Insert 1,000,000 rows into the Right CTE table
> -- All 1 Million rows share the exact same join key (999)
> WITH ten AS (
> SELECT 0 AS n UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
> UNION ALL
> SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL
> SELECT 8 UNION ALL SELECT 9
> )
> INSERT INTO test_right_cte
> SELECT
> 999 AS product_sk,
> 'wrong_type' AS product_type
> FROM ten a
> CROSS JOIN ten b
> CROSS JOIN ten c
> CROSS JOIN ten d
> CROSS JOIN ten e
> CROSS JOIN ten f;
> -- OOM Query
> SELECT STRAIGHT_JOIN count(1)
> FROM test_left_fact f
> LEFT JOIN test_right_cte p2
> ON f.product_sk = p2.product_sk
> WHERE (
> -- Condition 1: Evaluates to FALSE
> UPPER(f.drv_event_type) LIKE '%_CONF'
> OR
> -- Condition 2: The Memory Trap
> -- Because Condition 1 is FALSE, Impala MUST evaluate this entire block for
> all 1000,000 matches.
> -- Every UPPER() and CONCAT() asks the OS for fresh memory.
> (
> CASE
> WHEN UPPER(p2.product_type) = 'COMMODITY-SWAP'
> THEN UPPER(CONCAT(f.drv_event_type, '_SWAP_1'))
>
> WHEN UPPER(p2.product_type) = 'METAL-FUTURE'
> THEN UPPER(CONCAT(f.drv_event_type, '_METAL_2'))
>
> WHEN UPPER(p2.product_type) = 'ENERGY-FUTURE'
> THEN UPPER(CONCAT(f.drv_event_type, '_ENERGY_3'))
>
> WHEN UPPER(p2.product_type) = 'AGRICULTURAL-FUTURE'
> THEN UPPER(CONCAT(f.drv_event_type, '_AGRI_4'))
>
> WHEN UPPER(p2.product_type) = 'SOFT-FUTURE'
> THEN UPPER(CONCAT(f.drv_event_type, '_SOFT_5'))
>
> WHEN UPPER(p2.product_type) = 'OTHER-FUTURE'
> THEN UPPER(CONCAT(f.drv_event_type, '_OTHER_6'))
>
> WHEN UPPER(p2.product_type) = 'METAL-OPTION'
> THEN UPPER(CONCAT(f.drv_event_type, '_MOPT_7'))
>
> WHEN UPPER(p2.product_type) = 'ENERGY-OPTION'
> THEN UPPER(CONCAT(f.drv_event_type, '_EOPT_8'))
>
> WHEN UPPER(p2.product_type) = 'SPREAD-OPTION'
> THEN UPPER(CONCAT(f.drv_event_type, '_SOPT_9'))
>
> WHEN UPPER(p2.product_type) = 'COMMODITY-INDEX'
> THEN UPPER(CONCAT(f.drv_event_type, '_INDEX_10'))
>
> -- The fallback generates two more uppercase string allocations
> ELSE UPPER(CONCAT(UPPER(f.drv_event_type), '_UNKNOWN'))
>
> END = 'COMMODITY-FORWARD'
> )
> ); {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]