[ 
https://issues.apache.org/jira/browse/HIVE-29539?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

asish kumar patra updated HIVE-29539:
-------------------------------------
    Description: 
Below is the testcase:

 

 

CREATE TABLE `fb00_xdn019_ref_frr_commune_test`(
   `kc_code` string COMMENT 'Code INSEE de la commune',
   `dc_niveaufrr` string COMMENT 'Niveau de classement FRR de la commune',
   `filename` string COMMENT 'Nom du fichier source',
   `date_import_raw` timestamp COMMENT 'Date d\'ajout dans le niveau RAW',
   `date_insertion_clean` timestamp COMMENT 'Date d\'ajout dans le niveau 
CLEAN')
 CLUSTERED BY (
   `kc_code`)
 INTO 1 BUCKETS
;


CREATE EXTERNAL TABLE default.`fb00_xdn019_ref_frr_commune`(
   `donneesfrr` array<struct<`code`:string, `niveaufrr`:string>> COMMENT 'from 
deserializer')
 COMMENT 'R▒f▒rentiel de classement FRR des communes'
 PARTITIONED BY (
   `yearbrut` string,
   `monthbrut` string,
   `daybrut` string)
 ROW FORMAT SERDE
   'org.apache.hadoop.hive.serde2.JsonSerDe'
 STORED AS INPUTFORMAT
   'org.apache.hadoop.mapred.TextInputFormat'
 OUTPUTFORMAT
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
;

 


CREATE VIEW default.fb00_xdn019_ref_frr_commune_vue AS SELECT `_c0` AS 
`kc_code`, `_c1` AS `dc_niveaufrr`, `yearbrut` AS `yearbrut`, `monthbrut` AS 
`monthbrut`, `daybrut` AS `daybrut`, `input__file__name` AS `filepath`, `_c6` 
AS `fichier`, `block__offset__inside__file` AS `posdansfichier`, `_c8` AS 
`date_import_raw` FROM (SELECT
 NVL(`donneesfrr`.`frr`.`code`,null),
 NVL(`donneesfrr`.`frr`.`niveauFrr`,null),
 `src`.`yearbrut`,
 `src`.`monthbrut`,
 `src`.`daybrut`,
 `src`.`input__file__name`,
 REGEXP_EXTRACT(`src`.`input__file__name`,'([^/]+)(?!.*/)',1),
 `src`.`block__offset__inside__file`,
 
cast(regexp_replace(`src`.`input__file__name`,".*.(\\d\{4})(\\d\{2})(\\d\{2})_(\\d\{2})(\\d\{2})(\\d\{2})(\\d+)*(\\D.*)*","$1-$2-$3
 $4:$5:$6.$7") AS Timestamp)
 FROM ( SELECT `fb00_xdn019_ref_frr_commune`.`donneesfrr`, 
`fb00_xdn019_ref_frr_commune`.`yearbrut`, 
`fb00_xdn019_ref_frr_commune`.`monthbrut`, 
`fb00_xdn019_ref_frr_commune`.`daybrut`, 
`fb00_xdn019_ref_frr_commune`.`input__file__name`, 
`fb00_xdn019_ref_frr_commune`.`block__offset__inside__file` FROM 
`default`.`fb00_xdn019_ref_frr_commune`
 where `fb00_xdn019_ref_frr_commune`.`yearbrut` = '0000' and 
`fb00_xdn019_ref_frr_commune`.`monthbrut` = '00' and 
`fb00_xdn019_ref_frr_commune`.`daybrut` = '01') `src`
 LATERAL VIEW OUTER explode(`src`.`donneesfrr`) `donneesFrr` AS `frr`) 
`fb00_xdn019_ref_frr_commune_vue`
;

select *
FROM default.fb00_xdn019_ref_frr_commune_test
    WHERE concat(CASE WHEN kc_code IS NULL THEN "" ELSE TRIM(CAST(kc_code AS 
STRING)) END)
        IN (
            SELECT concat(CASE WHEN kc_code IS NULL THEN "" ELSE 
TRIM(CAST(kc_code AS STRING)) END)
            FROM default.fb00_xdn019_ref_frr_commune_vue
            WHERE (yearbrut='0000' AND monthbrut='00' AND daybrut='01') AND 1=1
        )
;

 

 

Error: Error while compiling statement: FAILED: 
CalciteSubquerySemanticException CBO can not handle Sub Query; Query ID: 
akpatra_20260401034113_a5b5712e-b069-4024-bbc5-3a5b923425be 
(state=42000,code=40000)

0: jdbc:hive2://localhost:10000/>

 

Setting below fixes the issue.

set hive.cbo.fallback.strategy=CONSERVATIVE;

or

set hive.cbo.fallback.strategy=ALWAYS;

We should never use NEVER as this will use legacy optimiser.

HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "NEVER", new 
StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS"), "The strategy defines when 
Hive fallbacks to legacy optimizer when CBO fails:" + "NEVER, never use the 
legacy optimizer (all CBO errors are fatal);" + "ALWAYS, always use the legacy 
optimizer (CBO errors are not fatal);" + "CONSERVATIVE, use the legacy 
optimizer only when the CBO error is not related to

I believe this was introduced  because of below jira's:

https://issues.apache.org/jira/browse/HIVE-24601 
https://issues.apache.org/jira/browse/HIVE-27831

  was:
Below is the testcase:

 

 

CREATE TABLE `fb00_xdn019_ref_frr_commune_test`(
   `kc_code` string COMMENT 'Code INSEE de la commune',
   `dc_niveaufrr` string COMMENT 'Niveau de classement FRR de la commune',
   `filename` string COMMENT 'Nom du fichier source',
   `date_import_raw` timestamp COMMENT 'Date d\'ajout dans le niveau RAW',
   `date_insertion_clean` timestamp COMMENT 'Date d\'ajout dans le niveau 
CLEAN')
 CLUSTERED BY (
   `kc_code`)
 INTO 1 BUCKETS
;


CREATE EXTERNAL TABLE default.`fb00_xdn019_ref_frr_commune`(
   `donneesfrr` array<struct<`code`:string, `niveaufrr`:string>> COMMENT 'from 
deserializer')
 COMMENT 'R▒f▒rentiel de classement FRR des communes'
 PARTITIONED BY (
   `yearbrut` string,
   `monthbrut` string,
   `daybrut` string)
 ROW FORMAT SERDE
   'org.apache.hadoop.hive.serde2.JsonSerDe'
 STORED AS INPUTFORMAT
   'org.apache.hadoop.mapred.TextInputFormat'
 OUTPUTFORMAT
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
;

 


CREATE VIEW default.fb00_xdn019_ref_frr_commune_vue AS SELECT `_c0` AS 
`kc_code`, `_c1` AS `dc_niveaufrr`, `yearbrut` AS `yearbrut`, `monthbrut` AS 
`monthbrut`, `daybrut` AS `daybrut`, `input__file__name` AS `filepath`, `_c6` 
AS `fichier`, `block__offset__inside__file` AS `posdansfichier`, `_c8` AS 
`date_import_raw` FROM (SELECT
 NVL(`donneesfrr`.`frr`.`code`,null),
 NVL(`donneesfrr`.`frr`.`niveauFrr`,null),
 `src`.`yearbrut`,
 `src`.`monthbrut`,
 `src`.`daybrut`,
 `src`.`input__file__name`,
 REGEXP_EXTRACT(`src`.`input__file__name`,'([^/]+)(?!.*/)',1),
 `src`.`block__offset__inside__file`,
 
cast(regexp_replace(`src`.`input__file__name`,".*.(\\d\{4})(\\d\{2})(\\d\{2})_(\\d\{2})(\\d\{2})(\\d\{2})(\\d+)*(\\D.*)*","$1-$2-$3
 $4:$5:$6.$7") AS Timestamp)
 FROM ( SELECT `fb00_xdn019_ref_frr_commune`.`donneesfrr`, 
`fb00_xdn019_ref_frr_commune`.`yearbrut`, 
`fb00_xdn019_ref_frr_commune`.`monthbrut`, 
`fb00_xdn019_ref_frr_commune`.`daybrut`, 
`fb00_xdn019_ref_frr_commune`.`input__file__name`, 
`fb00_xdn019_ref_frr_commune`.`block__offset__inside__file` FROM 
`default`.`fb00_xdn019_ref_frr_commune`
 where `fb00_xdn019_ref_frr_commune`.`yearbrut` = '0000' and 
`fb00_xdn019_ref_frr_commune`.`monthbrut` = '00' and 
`fb00_xdn019_ref_frr_commune`.`daybrut` = '01') `src`
 LATERAL VIEW OUTER explode(`src`.`donneesfrr`) `donneesFrr` AS `frr`) 
`fb00_xdn019_ref_frr_commune_vue`
;

select *
FROM default.fb00_xdn019_ref_frr_commune_test
    WHERE concat(CASE WHEN kc_code IS NULL THEN "" ELSE TRIM(CAST(kc_code AS 
STRING)) END)
        IN (
            SELECT concat(CASE WHEN kc_code IS NULL THEN "" ELSE 
TRIM(CAST(kc_code AS STRING)) END)
            FROM default.fb00_xdn019_ref_frr_commune_vue
            WHERE (yearbrut='0000' AND monthbrut='00' AND daybrut='01') AND 1=1
        )
;

 

 

Error: Error while compiling statement: FAILED: 
CalciteSubquerySemanticException CBO can not handle Sub Query; Query ID: 
akpatra_20260401034113_a5b5712e-b069-4024-bbc5-3a5b923425be 
(state=42000,code=40000)

0: jdbc:hive2://localhost:10000/> 

 

Setting below fixes the issue.

set hive.cbo.fallback.strategy=CONSERVATIVE;

or

set hive.cbo.fallback.strategy=ALWAYS;

We should never use NEVER as this will use legacy optimiser.

HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "NEVER", new 
StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS"), "The strategy defines when 
Hive fallbacks to legacy optimizer when CBO fails:" + "NEVER, never use the 
legacy optimizer (all CBO errors are fatal);" + "ALWAYS, always use the legacy 
optimizer (CBO errors are not fatal);" + "CONSERVATIVE, use the legacy 
optimizer only when the CBO error is not related to


I believe this was introduced  because of below jira's:

https://issues.apache.org/jira/browse/HIVE-24601 
https://issues.apache.org/jira/browse/HIVE-27831


> Query Fails with "CalciteSubquerySemanticException CBO can not handle Sub 
> Query"
> --------------------------------------------------------------------------------
>
>                 Key: HIVE-29539
>                 URL: https://issues.apache.org/jira/browse/HIVE-29539
>             Project: Hive
>          Issue Type: Bug
>          Components: CBO
>            Reporter: asish kumar patra
>            Priority: Major
>         Attachments: reproducer.hql
>
>
> Below is the testcase:
>  
>  
> CREATE TABLE `fb00_xdn019_ref_frr_commune_test`(
>    `kc_code` string COMMENT 'Code INSEE de la commune',
>    `dc_niveaufrr` string COMMENT 'Niveau de classement FRR de la commune',
>    `filename` string COMMENT 'Nom du fichier source',
>    `date_import_raw` timestamp COMMENT 'Date d\'ajout dans le niveau RAW',
>    `date_insertion_clean` timestamp COMMENT 'Date d\'ajout dans le niveau 
> CLEAN')
>  CLUSTERED BY (
>    `kc_code`)
>  INTO 1 BUCKETS
> ;
> CREATE EXTERNAL TABLE default.`fb00_xdn019_ref_frr_commune`(
>    `donneesfrr` array<struct<`code`:string, `niveaufrr`:string>> COMMENT 
> 'from deserializer')
>  COMMENT 'R▒f▒rentiel de classement FRR des communes'
>  PARTITIONED BY (
>    `yearbrut` string,
>    `monthbrut` string,
>    `daybrut` string)
>  ROW FORMAT SERDE
>    'org.apache.hadoop.hive.serde2.JsonSerDe'
>  STORED AS INPUTFORMAT
>    'org.apache.hadoop.mapred.TextInputFormat'
>  OUTPUTFORMAT
>    'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
> ;
>  
> CREATE VIEW default.fb00_xdn019_ref_frr_commune_vue AS SELECT `_c0` AS 
> `kc_code`, `_c1` AS `dc_niveaufrr`, `yearbrut` AS `yearbrut`, `monthbrut` AS 
> `monthbrut`, `daybrut` AS `daybrut`, `input__file__name` AS `filepath`, `_c6` 
> AS `fichier`, `block__offset__inside__file` AS `posdansfichier`, `_c8` AS 
> `date_import_raw` FROM (SELECT
>  NVL(`donneesfrr`.`frr`.`code`,null),
>  NVL(`donneesfrr`.`frr`.`niveauFrr`,null),
>  `src`.`yearbrut`,
>  `src`.`monthbrut`,
>  `src`.`daybrut`,
>  `src`.`input__file__name`,
>  REGEXP_EXTRACT(`src`.`input__file__name`,'([^/]+)(?!.*/)',1),
>  `src`.`block__offset__inside__file`,
>  
> cast(regexp_replace(`src`.`input__file__name`,".*.(\\d\{4})(\\d\{2})(\\d\{2})_(\\d\{2})(\\d\{2})(\\d\{2})(\\d+)*(\\D.*)*","$1-$2-$3
>  $4:$5:$6.$7") AS Timestamp)
>  FROM ( SELECT `fb00_xdn019_ref_frr_commune`.`donneesfrr`, 
> `fb00_xdn019_ref_frr_commune`.`yearbrut`, 
> `fb00_xdn019_ref_frr_commune`.`monthbrut`, 
> `fb00_xdn019_ref_frr_commune`.`daybrut`, 
> `fb00_xdn019_ref_frr_commune`.`input__file__name`, 
> `fb00_xdn019_ref_frr_commune`.`block__offset__inside__file` FROM 
> `default`.`fb00_xdn019_ref_frr_commune`
>  where `fb00_xdn019_ref_frr_commune`.`yearbrut` = '0000' and 
> `fb00_xdn019_ref_frr_commune`.`monthbrut` = '00' and 
> `fb00_xdn019_ref_frr_commune`.`daybrut` = '01') `src`
>  LATERAL VIEW OUTER explode(`src`.`donneesfrr`) `donneesFrr` AS `frr`) 
> `fb00_xdn019_ref_frr_commune_vue`
> ;
> select *
> FROM default.fb00_xdn019_ref_frr_commune_test
>     WHERE concat(CASE WHEN kc_code IS NULL THEN "" ELSE TRIM(CAST(kc_code AS 
> STRING)) END)
>         IN (
>             SELECT concat(CASE WHEN kc_code IS NULL THEN "" ELSE 
> TRIM(CAST(kc_code AS STRING)) END)
>             FROM default.fb00_xdn019_ref_frr_commune_vue
>             WHERE (yearbrut='0000' AND monthbrut='00' AND daybrut='01') AND 
> 1=1
>         )
> ;
>  
>  
> Error: Error while compiling statement: FAILED: 
> CalciteSubquerySemanticException CBO can not handle Sub Query; Query ID: 
> akpatra_20260401034113_a5b5712e-b069-4024-bbc5-3a5b923425be 
> (state=42000,code=40000)
> 0: jdbc:hive2://localhost:10000/>
>  
> Setting below fixes the issue.
> set hive.cbo.fallback.strategy=CONSERVATIVE;
> or
> set hive.cbo.fallback.strategy=ALWAYS;
> We should never use NEVER as this will use legacy optimiser.
> HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "NEVER", new 
> StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS"), "The strategy defines 
> when Hive fallbacks to legacy optimizer when CBO fails:" + "NEVER, never use 
> the legacy optimizer (all CBO errors are fatal);" + "ALWAYS, always use the 
> legacy optimizer (CBO errors are not fatal);" + "CONSERVATIVE, use the legacy 
> optimizer only when the CBO error is not related to
> I believe this was introduced  because of below jira's:
> https://issues.apache.org/jira/browse/HIVE-24601 
> https://issues.apache.org/jira/browse/HIVE-27831



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to