[
https://issues.apache.org/jira/browse/HIVE-29539?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
asish kumar patra updated HIVE-29539:
-------------------------------------
Description:
Below is the testcase:
CREATE TABLE `fb00_xdn019_ref_frr_commune_test`(
`kc_code` string COMMENT 'Code INSEE de la commune',
`dc_niveaufrr` string COMMENT 'Niveau de classement FRR de la commune',
`filename` string COMMENT 'Nom du fichier source',
`date_import_raw` timestamp COMMENT 'Date d\'ajout dans le niveau RAW',
`date_insertion_clean` timestamp COMMENT 'Date d\'ajout dans le niveau
CLEAN')
CLUSTERED BY (
`kc_code`)
INTO 1 BUCKETS
;
CREATE EXTERNAL TABLE default.`fb00_xdn019_ref_frr_commune`(
`donneesfrr` array<struct<`code`:string, `niveaufrr`:string>> COMMENT 'from
deserializer')
COMMENT 'R▒f▒rentiel de classement FRR des communes'
PARTITIONED BY (
`yearbrut` string,
`monthbrut` string,
`daybrut` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
;
CREATE VIEW default.fb00_xdn019_ref_frr_commune_vue AS SELECT `_c0` AS
`kc_code`, `_c1` AS `dc_niveaufrr`, `yearbrut` AS `yearbrut`, `monthbrut` AS
`monthbrut`, `daybrut` AS `daybrut`, `input__file__name` AS `filepath`, `_c6`
AS `fichier`, `block__offset__inside__file` AS `posdansfichier`, `_c8` AS
`date_import_raw` FROM (SELECT
NVL(`donneesfrr`.`frr`.`code`,null),
NVL(`donneesfrr`.`frr`.`niveauFrr`,null),
`src`.`yearbrut`,
`src`.`monthbrut`,
`src`.`daybrut`,
`src`.`input__file__name`,
REGEXP_EXTRACT(`src`.`input__file__name`,'([^/]+)(?!.*/)',1),
`src`.`block__offset__inside__file`,
cast(regexp_replace(`src`.`input__file__name`,".*.(\\d\{4})(\\d\{2})(\\d\{2})_(\\d\{2})(\\d\{2})(\\d\{2})(\\d+)*(\\D.*)*","$1-$2-$3
$4:$5:$6.$7") AS Timestamp)
FROM ( SELECT `fb00_xdn019_ref_frr_commune`.`donneesfrr`,
`fb00_xdn019_ref_frr_commune`.`yearbrut`,
`fb00_xdn019_ref_frr_commune`.`monthbrut`,
`fb00_xdn019_ref_frr_commune`.`daybrut`,
`fb00_xdn019_ref_frr_commune`.`input__file__name`,
`fb00_xdn019_ref_frr_commune`.`block__offset__inside__file` FROM
`default`.`fb00_xdn019_ref_frr_commune`
where `fb00_xdn019_ref_frr_commune`.`yearbrut` = '0000' and
`fb00_xdn019_ref_frr_commune`.`monthbrut` = '00' and
`fb00_xdn019_ref_frr_commune`.`daybrut` = '01') `src`
LATERAL VIEW OUTER explode(`src`.`donneesfrr`) `donneesFrr` AS `frr`)
`fb00_xdn019_ref_frr_commune_vue`
;
select *
FROM default.fb00_xdn019_ref_frr_commune_test
WHERE concat(CASE WHEN kc_code IS NULL THEN "" ELSE TRIM(CAST(kc_code AS
STRING)) END)
IN (
SELECT concat(CASE WHEN kc_code IS NULL THEN "" ELSE
TRIM(CAST(kc_code AS STRING)) END)
FROM default.fb00_xdn019_ref_frr_commune_vue
WHERE (yearbrut='0000' AND monthbrut='00' AND daybrut='01') AND 1=1
)
;
Error: Error while compiling statement: FAILED:
CalciteSubquerySemanticException CBO can not handle Sub Query; Query ID:
akpatra_20260401034113_a5b5712e-b069-4024-bbc5-3a5b923425be
(state=42000,code=40000)
0: jdbc:hive2://localhost:10000/>
Setting below fixes the issue.
set hive.cbo.fallback.strategy=CONSERVATIVE;
or
set hive.cbo.fallback.strategy=ALWAYS;
We should never use NEVER as this will use legacy optimiser.
HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "NEVER", new
StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS"), "The strategy defines when
Hive fallbacks to legacy optimizer when CBO fails:" + "NEVER, never use the
legacy optimizer (all CBO errors are fatal);" + "ALWAYS, always use the legacy
optimizer (CBO errors are not fatal);" + "CONSERVATIVE, use the legacy
optimizer only when the CBO error is not related to
I believe this was introduced because of below jira's:
https://issues.apache.org/jira/browse/HIVE-24601
https://issues.apache.org/jira/browse/HIVE-27831
was:
Below is the testcase:
CREATE TABLE `fb00_xdn019_ref_frr_commune_test`(
`kc_code` string COMMENT 'Code INSEE de la commune',
`dc_niveaufrr` string COMMENT 'Niveau de classement FRR de la commune',
`filename` string COMMENT 'Nom du fichier source',
`date_import_raw` timestamp COMMENT 'Date d\'ajout dans le niveau RAW',
`date_insertion_clean` timestamp COMMENT 'Date d\'ajout dans le niveau
CLEAN')
CLUSTERED BY (
`kc_code`)
INTO 1 BUCKETS
;
CREATE EXTERNAL TABLE default.`fb00_xdn019_ref_frr_commune`(
`donneesfrr` array<struct<`code`:string, `niveaufrr`:string>> COMMENT 'from
deserializer')
COMMENT 'R▒f▒rentiel de classement FRR des communes'
PARTITIONED BY (
`yearbrut` string,
`monthbrut` string,
`daybrut` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
;
CREATE VIEW default.fb00_xdn019_ref_frr_commune_vue AS SELECT `_c0` AS
`kc_code`, `_c1` AS `dc_niveaufrr`, `yearbrut` AS `yearbrut`, `monthbrut` AS
`monthbrut`, `daybrut` AS `daybrut`, `input__file__name` AS `filepath`, `_c6`
AS `fichier`, `block__offset__inside__file` AS `posdansfichier`, `_c8` AS
`date_import_raw` FROM (SELECT
NVL(`donneesfrr`.`frr`.`code`,null),
NVL(`donneesfrr`.`frr`.`niveauFrr`,null),
`src`.`yearbrut`,
`src`.`monthbrut`,
`src`.`daybrut`,
`src`.`input__file__name`,
REGEXP_EXTRACT(`src`.`input__file__name`,'([^/]+)(?!.*/)',1),
`src`.`block__offset__inside__file`,
cast(regexp_replace(`src`.`input__file__name`,".*.(\\d\{4})(\\d\{2})(\\d\{2})_(\\d\{2})(\\d\{2})(\\d\{2})(\\d+)*(\\D.*)*","$1-$2-$3
$4:$5:$6.$7") AS Timestamp)
FROM ( SELECT `fb00_xdn019_ref_frr_commune`.`donneesfrr`,
`fb00_xdn019_ref_frr_commune`.`yearbrut`,
`fb00_xdn019_ref_frr_commune`.`monthbrut`,
`fb00_xdn019_ref_frr_commune`.`daybrut`,
`fb00_xdn019_ref_frr_commune`.`input__file__name`,
`fb00_xdn019_ref_frr_commune`.`block__offset__inside__file` FROM
`default`.`fb00_xdn019_ref_frr_commune`
where `fb00_xdn019_ref_frr_commune`.`yearbrut` = '0000' and
`fb00_xdn019_ref_frr_commune`.`monthbrut` = '00' and
`fb00_xdn019_ref_frr_commune`.`daybrut` = '01') `src`
LATERAL VIEW OUTER explode(`src`.`donneesfrr`) `donneesFrr` AS `frr`)
`fb00_xdn019_ref_frr_commune_vue`
;
select *
FROM default.fb00_xdn019_ref_frr_commune_test
WHERE concat(CASE WHEN kc_code IS NULL THEN "" ELSE TRIM(CAST(kc_code AS
STRING)) END)
IN (
SELECT concat(CASE WHEN kc_code IS NULL THEN "" ELSE
TRIM(CAST(kc_code AS STRING)) END)
FROM default.fb00_xdn019_ref_frr_commune_vue
WHERE (yearbrut='0000' AND monthbrut='00' AND daybrut='01') AND 1=1
)
;
Error: Error while compiling statement: FAILED:
CalciteSubquerySemanticException CBO can not handle Sub Query; Query ID:
akpatra_20260401034113_a5b5712e-b069-4024-bbc5-3a5b923425be
(state=42000,code=40000)
0: jdbc:hive2://localhost:10000/>
Setting below fixes the issue.
set hive.cbo.fallback.strategy=CONSERVATIVE;
or
set hive.cbo.fallback.strategy=ALWAYS;
We should never use NEVER as this will use legacy optimiser.
HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "NEVER", new
StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS"), "The strategy defines when
Hive fallbacks to legacy optimizer when CBO fails:" + "NEVER, never use the
legacy optimizer (all CBO errors are fatal);" + "ALWAYS, always use the legacy
optimizer (CBO errors are not fatal);" + "CONSERVATIVE, use the legacy
optimizer only when the CBO error is not related to
I believe this was introduced because of below jira's:
https://issues.apache.org/jira/browse/HIVE-24601
https://issues.apache.org/jira/browse/HIVE-27831
> Query Fails with "CalciteSubquerySemanticException CBO can not handle Sub
> Query"
> --------------------------------------------------------------------------------
>
> Key: HIVE-29539
> URL: https://issues.apache.org/jira/browse/HIVE-29539
> Project: Hive
> Issue Type: Bug
> Components: CBO
> Reporter: asish kumar patra
> Priority: Major
> Attachments: reproducer.hql
>
>
> Below is the testcase:
>
>
> CREATE TABLE `fb00_xdn019_ref_frr_commune_test`(
> `kc_code` string COMMENT 'Code INSEE de la commune',
> `dc_niveaufrr` string COMMENT 'Niveau de classement FRR de la commune',
> `filename` string COMMENT 'Nom du fichier source',
> `date_import_raw` timestamp COMMENT 'Date d\'ajout dans le niveau RAW',
> `date_insertion_clean` timestamp COMMENT 'Date d\'ajout dans le niveau
> CLEAN')
> CLUSTERED BY (
> `kc_code`)
> INTO 1 BUCKETS
> ;
> CREATE EXTERNAL TABLE default.`fb00_xdn019_ref_frr_commune`(
> `donneesfrr` array<struct<`code`:string, `niveaufrr`:string>> COMMENT
> 'from deserializer')
> COMMENT 'R▒f▒rentiel de classement FRR des communes'
> PARTITIONED BY (
> `yearbrut` string,
> `monthbrut` string,
> `daybrut` string)
> ROW FORMAT SERDE
> 'org.apache.hadoop.hive.serde2.JsonSerDe'
> STORED AS INPUTFORMAT
> 'org.apache.hadoop.mapred.TextInputFormat'
> OUTPUTFORMAT
> 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
> ;
>
> CREATE VIEW default.fb00_xdn019_ref_frr_commune_vue AS SELECT `_c0` AS
> `kc_code`, `_c1` AS `dc_niveaufrr`, `yearbrut` AS `yearbrut`, `monthbrut` AS
> `monthbrut`, `daybrut` AS `daybrut`, `input__file__name` AS `filepath`, `_c6`
> AS `fichier`, `block__offset__inside__file` AS `posdansfichier`, `_c8` AS
> `date_import_raw` FROM (SELECT
> NVL(`donneesfrr`.`frr`.`code`,null),
> NVL(`donneesfrr`.`frr`.`niveauFrr`,null),
> `src`.`yearbrut`,
> `src`.`monthbrut`,
> `src`.`daybrut`,
> `src`.`input__file__name`,
> REGEXP_EXTRACT(`src`.`input__file__name`,'([^/]+)(?!.*/)',1),
> `src`.`block__offset__inside__file`,
>
> cast(regexp_replace(`src`.`input__file__name`,".*.(\\d\{4})(\\d\{2})(\\d\{2})_(\\d\{2})(\\d\{2})(\\d\{2})(\\d+)*(\\D.*)*","$1-$2-$3
> $4:$5:$6.$7") AS Timestamp)
> FROM ( SELECT `fb00_xdn019_ref_frr_commune`.`donneesfrr`,
> `fb00_xdn019_ref_frr_commune`.`yearbrut`,
> `fb00_xdn019_ref_frr_commune`.`monthbrut`,
> `fb00_xdn019_ref_frr_commune`.`daybrut`,
> `fb00_xdn019_ref_frr_commune`.`input__file__name`,
> `fb00_xdn019_ref_frr_commune`.`block__offset__inside__file` FROM
> `default`.`fb00_xdn019_ref_frr_commune`
> where `fb00_xdn019_ref_frr_commune`.`yearbrut` = '0000' and
> `fb00_xdn019_ref_frr_commune`.`monthbrut` = '00' and
> `fb00_xdn019_ref_frr_commune`.`daybrut` = '01') `src`
> LATERAL VIEW OUTER explode(`src`.`donneesfrr`) `donneesFrr` AS `frr`)
> `fb00_xdn019_ref_frr_commune_vue`
> ;
> select *
> FROM default.fb00_xdn019_ref_frr_commune_test
> WHERE concat(CASE WHEN kc_code IS NULL THEN "" ELSE TRIM(CAST(kc_code AS
> STRING)) END)
> IN (
> SELECT concat(CASE WHEN kc_code IS NULL THEN "" ELSE
> TRIM(CAST(kc_code AS STRING)) END)
> FROM default.fb00_xdn019_ref_frr_commune_vue
> WHERE (yearbrut='0000' AND monthbrut='00' AND daybrut='01') AND
> 1=1
> )
> ;
>
>
> Error: Error while compiling statement: FAILED:
> CalciteSubquerySemanticException CBO can not handle Sub Query; Query ID:
> akpatra_20260401034113_a5b5712e-b069-4024-bbc5-3a5b923425be
> (state=42000,code=40000)
> 0: jdbc:hive2://localhost:10000/>
>
> Setting below fixes the issue.
> set hive.cbo.fallback.strategy=CONSERVATIVE;
> or
> set hive.cbo.fallback.strategy=ALWAYS;
> We should never use NEVER as this will use legacy optimiser.
> HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "NEVER", new
> StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS"), "The strategy defines
> when Hive fallbacks to legacy optimizer when CBO fails:" + "NEVER, never use
> the legacy optimizer (all CBO errors are fatal);" + "ALWAYS, always use the
> legacy optimizer (CBO errors are not fatal);" + "CONSERVATIVE, use the legacy
> optimizer only when the CBO error is not related to
> I believe this was introduced because of below jira's:
> https://issues.apache.org/jira/browse/HIVE-24601
> https://issues.apache.org/jira/browse/HIVE-27831
--
This message was sent by Atlassian Jira
(v8.20.10#820010)