[ 
https://issues.apache.org/jira/browse/HIVE-22893?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17037696#comment-17037696
 ] 

Hive QA commented on HIVE-22893:
--------------------------------



Here are the results of testing the latest attachment:
https://issues.apache.org/jira/secure/attachment/12993510/HIVE-22893.01.patch

{color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified.

{color:red}ERROR:{color} -1 due to 116 failed/errored test(s), 17994 tests 
executed
*Failed tests:*
{noformat}
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cbo_rp_gby2_map_multi_distinct]
 (batchId=95)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[cbo_rp_groupby3_noskew_multi_distinct]
 (batchId=45)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[column_pruner_multiple_children]
 (batchId=25)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[constprog_when_case] 
(batchId=66)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[count_dist_rewrite] 
(batchId=82)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[create_genericudaf] 
(batchId=94)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby11] (batchId=83)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1] (batchId=21)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_limit] 
(batchId=24)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_map] 
(batchId=81)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_map_nomap] 
(batchId=89)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_map_skew] 
(batchId=72)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby1_noskew] 
(batchId=38)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_limit] 
(batchId=10)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_map] 
(batchId=32)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_map_multi_distinct]
 (batchId=43)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_map_skew] 
(batchId=97)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_noskew] 
(batchId=2)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby2_noskew_multi_distinct]
 (batchId=96)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_map] 
(batchId=77)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_map_multi_distinct]
 (batchId=36)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_map_skew] 
(batchId=67)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_noskew] 
(batchId=92)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby3_noskew_multi_distinct]
 (batchId=71)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby4] (batchId=70)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby4_noskew] 
(batchId=69)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby5] (batchId=46)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby5_noskew] 
(batchId=99)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby6] (batchId=62)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby6_map] 
(batchId=93)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby6_map_skew] 
(batchId=47)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby6_noskew] 
(batchId=2)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_map] (batchId=5)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_map_multi_single_reducer]
 (batchId=6)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_map_skew] 
(batchId=50)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_noskew] 
(batchId=98)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby7_noskew_multi_single_reducer]
 (batchId=68)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby8_map_skew] 
(batchId=57)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby9] (batchId=7)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_map_ppr] 
(batchId=9)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_map_ppr_multi_distinct]
 (batchId=58)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_multi_single_reducer2]
 (batchId=22)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_multi_single_reducer]
 (batchId=70)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_nocolumnalign] 
(batchId=63)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_position] 
(batchId=45)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_ppr] (batchId=34)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[groupby_ppr_multi_distinct]
 (batchId=66)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[infer_bucket_sort_dyn_part]
 (batchId=42)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[list_bucket_dml_6] 
(batchId=36)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[list_bucket_dml_7] 
(batchId=63)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[list_bucket_dml_8] 
(batchId=83)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[merge_dynamic_partition4]
 (batchId=39)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[merge_dynamic_partition5]
 (batchId=38)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[nullgroup4_multi_distinct]
 (batchId=12)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[offset_limit_global_optimizer]
 (batchId=22)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[union17] (batchId=82)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[unionall_unbalancedppd] 
(batchId=3)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vector_case_when_1] 
(batchId=97)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vector_case_when_2] 
(batchId=61)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vector_groupby4] 
(batchId=17)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vector_groupby6] 
(batchId=101)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vector_if_expr] 
(batchId=12)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vector_nvl] (batchId=83)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vectorization_multi_value]
 (batchId=84)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vectorized_string_funcs] 
(batchId=66)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[vectorized_timestamp_funcs]
 (batchId=35)
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[w1] (batchId=21)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_avg] 
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_group_concat]
 (batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_max] 
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_max_n]
 (batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_min] 
(batchId=299)
org.apache.hadoop.hive.cli.TestContribCliDriver.testCliDriver[udaf_example_min_n]
 (batchId=299)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[explainuser_2] 
(batchId=163)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[unionDistinct_1] 
(batchId=161)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_windowing_2]
 (batchId=172)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[count_dist_rewrite]
 (batchId=186)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainanalyze_2]
 (batchId=185)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[groupby1] 
(batchId=169)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[groupby2] 
(batchId=179)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[lineage2] 
(batchId=181)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[offset_limit]
 (batchId=176)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[tez_union_multiinsert]
 (batchId=175)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_case_when_1]
 (batchId=190)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_case_when_2]
 (batchId=181)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_groupby4]
 (batchId=169)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_groupby6]
 (batchId=191)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr]
 (batchId=167)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_nvl] 
(batchId=186)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_udf1]
 (batchId=182)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_windowing]
 (batchId=191)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_string_funcs]
 (batchId=182)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vectorized_timestamp_funcs]
 (batchId=173)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[windowing] 
(batchId=181)
org.apache.hadoop.hive.cli.TestMiniSparkOnYarnCliDriver.testCliDriver[dynamic_rdd_cache]
 (batchId=199)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby3_map_skew] 
(batchId=144)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby7_map] 
(batchId=119)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby7_map_skew] 
(batchId=137)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby8_map_skew] 
(batchId=140)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby9] 
(batchId=120)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[groupby_position] 
(batchId=135)
org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[union17] 
(batchId=150)
org.apache.hadoop.hive.cli.TestTezPerfCliDriver.testCliDriver[query19] 
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfCliDriver.testCliDriver[query23] 
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfCliDriver.testCliDriver[query79] 
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfCliDriver.testCliDriver[query85] 
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfCliDriver.testCliDriver[query8] 
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfCliDriver.testCliDriver[query99] 
(batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[cbo_query23]
 (batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query19]
 (batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query23]
 (batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query79]
 (batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query85]
 (batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query8]
 (batchId=304)
org.apache.hadoop.hive.cli.TestTezPerfConstraintsCliDriver.testCliDriver[query99]
 (batchId=304)
org.apache.hadoop.hive.ql.TestMTQueries.testMTQueries1 (batchId=279)
{noformat}

Test results: 
https://builds.apache.org/job/PreCommit-HIVE-Build/20659/testReport
Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/20659/console
Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-20659/

Messages:
{noformat}
Executing org.apache.hive.ptest.execution.TestCheckPhase
Executing org.apache.hive.ptest.execution.PrepPhase
Executing org.apache.hive.ptest.execution.YetusPhase
Executing org.apache.hive.ptest.execution.ExecutionPhase
Executing org.apache.hive.ptest.execution.ReportingPhase
Tests exited with: TestsFailedException: 116 tests failed
{noformat}

This message is automatically generated.

ATTACHMENT ID: 12993510 - PreCommit-HIVE-Build

> Enhance data size estimation for fields computed by UDFs
> --------------------------------------------------------
>
>                 Key: HIVE-22893
>                 URL: https://issues.apache.org/jira/browse/HIVE-22893
>             Project: Hive
>          Issue Type: Improvement
>          Components: Statistics
>            Reporter: Zoltan Haindrich
>            Assignee: Zoltan Haindrich
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: HIVE-22893.01.patch
>
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
> Right now if we have columnstat on a column ; we use that to estimate things 
> about the column; - however if an UDF is executed on a column ; the resulting 
> column is treated as unknown thing and defaults are assumed.
> An improvement could be to give wide estimation(s) in case of frequently used 
> udf.
> For example; consider {{substr(c,1,1)}} ; no matter what the input; the 
> output is at most a 1 long string



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to