HIVE-14362: Support explain analyze in Hive (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Gopal V and Gabor Szadovszky)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/831bd7d8 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/831bd7d8 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/831bd7d8 Branch: refs/heads/branch-2.2 Commit: 831bd7d866e2a9177e89e3a1be01c8b175f29f2e Parents: b87d460 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Tue Aug 30 13:43:25 2016 -0700 Committer: Owen O'Malley <omal...@apache.org> Committed: Tue Mar 28 14:02:42 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hive/common/StatsSetupConst.java | 2 + .../test/resources/testconfiguration.properties | 5 + .../resources/testconfiguration.properties.orig | 1373 ++ .../java/org/apache/hadoop/hive/ql/Context.java | 61 +- .../org/apache/hadoop/hive/ql/Context.java.orig | 829 ++ .../java/org/apache/hadoop/hive/ql/Driver.java | 19 +- .../hadoop/hive/ql/exec/ColumnStatsTask.java | 4 + .../hadoop/hive/ql/exec/CommonJoinOperator.java | 1 + .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 4 + .../apache/hadoop/hive/ql/exec/ExplainTask.java | 6 +- .../hadoop/hive/ql/exec/FileSinkOperator.java | 4 +- .../hadoop/hive/ql/exec/GroupByOperator.java | 2 +- .../hadoop/hive/ql/exec/JoinOperator.java | 2 - .../hadoop/hive/ql/exec/LimitOperator.java | 1 + .../hadoop/hive/ql/exec/ListSinkOperator.java | 1 + .../apache/hadoop/hive/ql/exec/MoveTask.java | 4 + .../apache/hadoop/hive/ql/exec/Operator.java | 50 +- .../hadoop/hive/ql/exec/ReduceSinkOperator.java | 1 + .../hive/ql/exec/SerializationUtilities.java | 22 + .../apache/hadoop/hive/ql/exec/StatsTask.java | 9 +- .../hadoop/hive/ql/exec/TableScanOperator.java | 1 + .../hadoop/hive/ql/exec/UDTFOperator.java | 1 + .../apache/hadoop/hive/ql/hooks/ATSHook.java | 192 +- .../hadoop/hive/ql/optimizer/Optimizer.java | 3 +- .../physical/AnnotateRunTimeStatsOptimizer.java | 174 + .../optimizer/physical/PhysicalOptimizer.java | 7 +- .../ql/parse/ColumnStatsAutoGatherContext.java | 13 +- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 1 + .../hive/ql/parse/ExplainConfiguration.java | 117 + .../parse/ExplainSQRewriteSemanticAnalyzer.java | 3 +- .../hive/ql/parse/ExplainSemanticAnalyzer.java | 137 +- .../hadoop/hive/ql/parse/GenTezUtils.java | 7 +- .../apache/hadoop/hive/ql/parse/HiveParser.g | 2 +- .../hadoop/hive/ql/parse/MapReduceCompiler.java | 2 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 17 +- .../hive/ql/parse/SemanticAnalyzer.java.orig | 13038 +++++++++++++++++ .../hive/ql/parse/SubQueryDiagnostic.java | 2 +- .../hadoop/hive/ql/parse/TaskCompiler.java | 5 + .../hadoop/hive/ql/parse/TezCompiler.java | 12 +- .../hive/ql/parse/spark/SparkCompiler.java | 8 +- .../hive/ql/plan/AbstractOperatorDesc.java | 11 + .../apache/hadoop/hive/ql/plan/ExplainWork.java | 66 +- .../hadoop/hive/ql/plan/FileSinkDesc.java | 9 - .../hadoop/hive/ql/plan/MergeJoinWork.java | 8 +- .../hadoop/hive/ql/plan/OperatorDesc.java | 2 + .../apache/hadoop/hive/ql/plan/Statistics.java | 22 +- .../parse/TestUpdateDeleteSemanticAnalyzer.java | 4 +- .../queries/clientpositive/explainanalyze_1.q | 38 + .../queries/clientpositive/explainanalyze_2.q | 329 + .../queries/clientpositive/explainanalyze_3.q | 158 + .../queries/clientpositive/explainanalyze_4.q | 103 + .../queries/clientpositive/explainanalyze_5.q | 81 + .../clientpositive/columnstats_partlvl.q.out | 48 + .../clientpositive/columnstats_partlvl_dp.q.out | 28 + .../clientpositive/columnstats_quoting.q.out | 12 + .../clientpositive/columnstats_tbllvl.q.out | 42 + .../clientpositive/compute_stats_date.q.out | 6 + .../clientpositive/constant_prop_2.q.out | 7 + .../display_colstats_tbllvl.q.out | 18 + .../dynpart_sort_optimization_acid.q.out | 80 +- .../exec_parallel_column_stats.q.out | 6 + .../temp_table_display_colstats_tbllvl.q.out | 18 + .../clientpositive/tez/explainanalyze_1.q.out | 471 + .../clientpositive/tez/explainanalyze_2.q.out | 4827 ++++++ .../clientpositive/tez/explainanalyze_3.q.out | 46 +- .../clientpositive/tez/explainanalyze_4.q.out | 590 + .../clientpositive/tez/explainanalyze_5.q.out | 445 + 67 files changed, 23338 insertions(+), 279 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 1466b69..6530cb8 100644 --- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -83,6 +83,8 @@ public class StatsSetupConst { */ public static final String ROW_COUNT = "numRows"; + public static final String RUN_TIME_ROW_COUNT = "runTimeNumRows"; + /** * The name of the statistic Raw Data Size to be published or gathered. */ http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3ab2714..9720f76 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -388,6 +388,11 @@ minitez.query.files=acid_vectorization_missing_cols.q,\ dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ bucketpruning1.q,\ + explainanalyze_1.q,\ + explainanalyze_2.q,\ + explainanalyze_3.q,\ + explainanalyze_4.q,\ + explainanalyze_5.q,\ explainuser_1.q,\ explainuser_2.q,\ explainuser_3.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/itests/src/test/resources/testconfiguration.properties.orig ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties.orig b/itests/src/test/resources/testconfiguration.properties.orig new file mode 100644 index 0000000..d5ee9ed --- /dev/null +++ b/itests/src/test/resources/testconfiguration.properties.orig @@ -0,0 +1,1373 @@ +# NOTE: files should be listed in alphabetical order +minimr.query.files=auto_sortmerge_join_16.q,\ + bucket4.q,\ + bucket5.q,\ + bucket6.q,\ + bucket_many.q,\ + bucket_num_reducers.q,\ + bucket_num_reducers2.q,\ + bucketizedhiveinputformat.q,\ + bucketmapjoin6.q,\ + bucketmapjoin7.q,\ + disable_merge_for_bucketing.q,\ + empty_dir_in_table.q,\ + exchgpartition2lel.q,\ + external_table_with_space_in_location_path.q,\ + file_with_header_footer.q,\ + groupby2.q,\ + import_exported_table.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ + infer_bucket_sort_bucketed_table.q,\ + infer_bucket_sort_dyn_part.q,\ + infer_bucket_sort_map_operators.q,\ + infer_bucket_sort_merge.q,\ + infer_bucket_sort_num_buckets.q,\ + infer_bucket_sort_reducers_power_two.q,\ + input16_cc.q,\ + insert_dir_distcp.q,\ + join1.q,\ + join_acid_non_acid.q,\ + leftsemijoin_mr.q,\ + list_bucket_dml_10.q,\ + load_fs2.q,\ + load_hdfs_file_with_space_in_the_name.q,\ + non_native_window_udf.q, \ + orc_merge_diff_fs.q,\ + parallel_orderby.q,\ + quotedid_smb.q,\ + reduce_deduplicate.q,\ + remote_script.q,\ + root_dir_external_table.q,\ + schemeAuthority.q,\ + schemeAuthority2.q,\ + scriptfile1.q,\ + scriptfile1_win.q,\ + skewjoin_onesideskew.q,\ + table_nonprintable.q,\ + temp_table_external.q,\ + truncate_column_buckets.q,\ + uber_reduce.q,\ + udf_using.q + +# These tests are disabled for minimr +# ql_rewrite_gbtoidx.q,\ +# ql_rewrite_gbtoidx_cbo_1.q,\ +# ql_rewrite_gbtoidx_cbo_2.q,\ +# smb_mapjoin_8.q,\ + + +# Tests that are not enabled for CLI Driver +disabled.query.files=ql_rewrite_gbtoidx.q,\ + ql_rewrite_gbtoidx_cbo_1.q,\ + ql_rewrite_gbtoidx_cbo_2.q,\ + rcfile_merge1.q,\ + smb_mapjoin_8.q + +# NOTE: Add tests to minitez only if it is very +# specific to tez and cannot be added to minillap. +minitez.query.files.shared=delete_orig_table.q,\ + orc_merge12.q,\ + orc_vectorization_ppd.q,\ + unionDistinct_2.q,\ + update_orig_table.q,\ + vector_join_part_col_char.q,\ + vector_non_string_partition.q,\ + vectorization_div0.q,\ + vectorization_limit.q + +# NOTE: Add tests to minitez only if it is very +# specific to tez and cannot be added to minillap. +minitez.query.files=explainuser_3.q,\ + hybridgrace_hashjoin_1.q,\ + hybridgrace_hashjoin_2.q,\ + partition_column_names_with_leading_and_trailing_spaces.q,\ + stats_filemetadata.q,\ + tez_union_with_udf.q + +minillap.shared.query.files=acid_globallimit.q,\ + alter_merge_2_orc.q,\ + alter_merge_orc.q,\ + alter_merge_stats_orc.q,\ + auto_join0.q,\ + auto_join1.q,\ + auto_join21.q,\ + auto_join29.q,\ + auto_join30.q,\ + auto_join_filters.q,\ + auto_join_nulls.q,\ + auto_sortmerge_join_1.q,\ + auto_sortmerge_join_10.q,\ + auto_sortmerge_join_11.q,\ + auto_sortmerge_join_12.q,\ + auto_sortmerge_join_13.q,\ + auto_sortmerge_join_14.q,\ + auto_sortmerge_join_15.q,\ + auto_sortmerge_join_16.q,\ + auto_sortmerge_join_2.q,\ + auto_sortmerge_join_3.q,\ + auto_sortmerge_join_4.q,\ + auto_sortmerge_join_5.q,\ + auto_sortmerge_join_6.q,\ + auto_sortmerge_join_7.q,\ + auto_sortmerge_join_8.q,\ + auto_sortmerge_join_9.q,\ + bucket2.q,\ + bucket3.q,\ + bucket4.q,\ + bucket_map_join_tez1.q,\ + bucket_map_join_tez2.q,\ + cbo_gby.q,\ + cbo_gby_empty.q,\ + cbo_join.q,\ + cbo_limit.q,\ + cbo_semijoin.q,\ + cbo_simple_select.q,\ + cbo_stats.q,\ + cbo_subq_exists.q,\ + cbo_subq_in.q,\ + cbo_subq_not_in.q,\ + cbo_udf_udaf.q,\ + cbo_union.q,\ + cbo_views.q,\ + cbo_windowing.q,\ + column_names_with_leading_and_trailing_spaces.q,\ + constprog_dpp.q,\ + constprog_semijoin.q,\ + correlationoptimizer1.q,\ + count.q,\ + create_merge_compressed.q,\ + cross_join.q,\ + cross_product_check_1.q,\ + cross_product_check_2.q,\ + ctas.q,\ + cte_1.q,\ + cte_2.q,\ + cte_3.q,\ + cte_4.q,\ + cte_5.q,\ + cte_mat_1.q,\ + cte_mat_2.q,\ + cte_mat_3.q,\ + cte_mat_4.q,\ + cte_mat_5.q,\ + custom_input_output_format.q,\ + deleteAnalyze.q,\ + delete_all_non_partitioned.q,\ + delete_all_partitioned.q,\ + delete_tmp_table.q,\ + delete_where_no_match.q,\ + delete_where_non_partitioned.q,\ + delete_where_partitioned.q,\ + delete_whole_partition.q,\ + disable_merge_for_bucketing.q,\ + dynamic_partition_pruning.q,\ + dynamic_partition_pruning_2.q,\ + dynpart_sort_opt_vectorization.q,\ + dynpart_sort_optimization.q,\ + dynpart_sort_optimization2.q,\ + empty_join.q,\ + enforce_order.q,\ + filter_join_breaktask.q,\ + filter_join_breaktask2.q,\ + groupby1.q,\ + groupby2.q,\ + groupby3.q,\ + having.q,\ + identity_project_remove_skip.q,\ + insert1.q,\ + insert_into1.q,\ + insert_into2.q,\ + insert_orig_table.q,\ + insert_update_delete.q,\ + insert_values_dynamic_partitioned.q,\ + insert_values_non_partitioned.q,\ + insert_values_orig_table.,\ + insert_values_partitioned.q,\ + insert_values_tmp_table.q,\ + join0.q,\ + join1.q,\ + join_nullsafe.q,\ + leftsemijoin.q,\ + limit_pushdown.q,\ + llap_nullscan.q,\ + llapdecider.q,\ + load_dyn_part1.q,\ + load_dyn_part2.q,\ + load_dyn_part3.q,\ + lvj_mapjoin.q,\ + mapjoin2.q,\ + mapjoin_decimal.q,\ + mapjoin_mapjoin.q,\ + mapreduce1.q,\ + mapreduce2.q,\ + merge1.q,\ + merge2.q,\ + mergejoin.q,\ + metadata_only_queries.q,\ + metadata_only_queries_with_filters.q,\ + metadataonly1.q,\ + mrr.q,\ + nonmr_fetch_threshold.q,\ + optimize_nullscan.q,\ + orc_analyze.q,\ + orc_merge1.q,\ + orc_merge10.q,\ + orc_merge11.q,\ + orc_merge2.q,\ + orc_merge3.q,\ + orc_merge4.q,\ + orc_merge5.q,\ + orc_merge6.q,\ + orc_merge7.q,\ + orc_merge8.q,\ + orc_merge9.q,\ + orc_merge_incompat1.q,\ + orc_merge_incompat2.q,\ + orc_merge_incompat3.q,\ + orc_ppd_basic.q,\ + orc_ppd_schema_evol_1a.q,\ + orc_ppd_schema_evol_1b.q,\ + orc_ppd_schema_evol_2a.q,\ + orc_ppd_schema_evol_2b.q,\ + orc_ppd_schema_evol_3a.q,\ + order_null.q,\ + parallel.q,\ + ptf.q,\ + ptf_matchpath.q,\ + ptf_streaming.q,\ + sample1.q,\ + script_env_var1.q,\ + script_env_var2.q,\ + script_pipe.q,\ + scriptfile1.q,\ + selectDistinctStar.q,\ + select_dummy_source.q,\ + skewjoin.q,\ + stats_noscan_1.q,\ + stats_only_null.q,\ + subquery_exists.q,\ + subquery_in.q,\ + temp_table.q,\ + tez_bmj_schema_evolution.q,\ + tez_dml.q,\ + tez_dynpart_hashjoin_1.q,\ + tez_dynpart_hashjoin_2.q,\ + tez_fsstat.q,\ + tez_insert_overwrite_local_directory_1.q,\ + tez_join.q,\ + tez_join_hash.q,\ + tez_join_result_complex.q,\ + tez_join_tests.q,\ + tez_joins_explain.q,\ + tez_multi_union.q,\ + tez_schema_evolution.q,\ + tez_self_join.q,\ + tez_smb_1.q,\ + tez_smb_main.q,\ + tez_union.q,\ + tez_union2.q,\ + tez_union_decimal.q,\ + tez_union_dynamic_partition.q,\ + tez_union_group_by.q,\ + tez_union_multiinsert.q,\ + tez_union_view.q,\ + tez_vector_dynpart_hashjoin_1.q,\ + tez_vector_dynpart_hashjoin_2.q,\ + transform1.q,\ + transform2.q,\ + transform_ppr1.q,\ + transform_ppr2.q,\ + union2.q,\ + union3.q,\ + union4.q,\ + union5.q,\ + union6.q,\ + union7.q,\ + union8.q,\ + union9.q,\ + unionDistinct_1.q,\ + union_fast_stats.q,\ + union_stats.q,\ + union_type_chk.q,\ + update_after_multiple_inserts.q,\ + update_all_non_partitioned.q,\ + update_all_partitioned.q,\ + update_all_types.q,\ + update_tmp_table.q,\ + update_two_cols.q,\ + update_where_no_match.q,\ + update_where_non_partitioned.q,\ + update_where_partitioned.q,\ + vector_acid3.q,\ + vector_aggregate_9.q,\ + vector_aggregate_without_gby.q,\ + vector_auto_smb_mapjoin_14.q,\ + vector_between_columns.q,\ + vector_between_in.q,\ + vector_binary_join_groupby.q,\ + vector_bround.q,\ + vector_bucket.q,\ + vector_cast_constant.q,\ + vector_char_2.q,\ + vector_char_4.q,\ + vector_char_cast.q,\ + vector_char_mapjoin1.q,\ + vector_char_simple.q,\ + vector_coalesce.q,\ + vector_coalesce_2.q,\ + vector_complex_all.q,\ + vector_complex_join.q,\ + vector_count.q,\ + vector_count_distinct.q,\ + vector_data_types.q,\ + vector_date_1.q,\ + vector_decimal_1.q,\ + vector_decimal_10_0.q,\ + vector_decimal_2.q,\ + vector_decimal_3.q,\ + vector_decimal_4.q,\ + vector_decimal_5.q,\ + vector_decimal_6.q,\ + vector_decimal_aggregate.q,\ + vector_decimal_cast.q,\ + vector_decimal_expressions.q,\ + vector_decimal_mapjoin.q,\ + vector_decimal_math_funcs.q,\ + vector_decimal_precision.q,\ + vector_decimal_round.q,\ + vector_decimal_round_2.q,\ + vector_decimal_trailing.q,\ + vector_decimal_udf.q,\ + vector_decimal_udf2.q,\ + vector_distinct_2.q,\ + vector_elt.q,\ + vector_groupby4.q,\ + vector_groupby6.q,\ + vector_groupby_3.q,\ + vector_groupby_mapjoin.q,\ + vector_groupby_reduce.q,\ + vector_grouping_sets.q,\ + vector_if_expr.q,\ + vector_include_no_sel.q,\ + vector_inner_join.q,\ + vector_interval_1.q,\ + vector_interval_2.q,\ + vector_interval_arithmetic.q,\ + vector_interval_mapjoin.q,\ + vector_join30.q,\ + vector_join_filters.q,\ + vector_join_nulls.q,\ + vector_left_outer_join.q,\ + vector_left_outer_join2.q,\ + vector_leftsemi_mapjoin.q,\ + vector_mapjoin_reduce.q,\ + vector_mr_diff_schema_alias.q,\ + vector_multi_insert.q,\ + vector_null_projection.q,\ + vector_nullsafe_join.q,\ + vector_nvl.q,\ + vector_orderby_5.q,\ + vector_outer_join0.q,\ + vector_outer_join1.q,\ + vector_outer_join2.q,\ + vector_outer_join3.q,\ + vector_outer_join4.q,\ + vector_outer_join5.q,\ + vector_outer_join6.q,\ + vector_partition_diff_num_cols.q,\ + vector_partitioned_date_time.q,\ + vector_reduce1.q,\ + vector_reduce2.q,\ + vector_reduce3.q,\ + vector_reduce_groupby_decimal.q,\ + vector_string_concat.q,\ + vector_struct_in.q,\ + vector_varchar_4.q,\ + vector_varchar_mapjoin1.q,\ + vector_varchar_simple.q,\ + vector_when_case_null.q,\ + vectorization_0.q,\ + vectorization_1.q,\ + vectorization_10.q,\ + vectorization_11.q,\ + vectorization_12.q,\ + vectorization_13.q,\ + vectorization_14.q,\ + vectorization_15.q,\ + vectorization_16.q,\ + vectorization_17.q,\ + vectorization_2.q,\ + vectorization_3.q,\ + vectorization_4.q,\ + vectorization_5.q,\ + vectorization_6.q,\ + vectorization_7.q,\ + vectorization_8.q,\ + vectorization_9.q,\ + vectorization_decimal_date.q,\ + vectorization_nested_udf.q,\ + vectorization_not.q,\ + vectorization_part.q,\ + vectorization_part_project.q,\ + vectorization_part_varchar.q,\ + vectorization_pushdown.q,\ + vectorization_short_regress.q,\ + vectorized_bucketmapjoin1.q,\ + vectorized_case.q,\ + vectorized_casts.q,\ + vectorized_context.q,\ + vectorized_date_funcs.q,\ + vectorized_distinct_gby.q,\ + vectorized_dynamic_partition_pruning.q,\ + vectorized_mapjoin.q,\ + vectorized_math_funcs.q,\ + vectorized_nested_mapjoin.q,\ + vectorized_parquet.q,\ + vectorized_parquet_types.q,\ + vectorized_ptf.q,\ + vectorized_rcfile_columnar.q,\ + vectorized_shufflejoin.q,\ + vectorized_string_funcs.q,\ + vectorized_timestamp.q,\ + vectorized_timestamp_funcs.q,\ + vectorized_timestamp_ints_casts.q + +minillap.query.files=acid_bucket_pruning.q,\ + acid_vectorization_missing_cols.q,\ + bucket_map_join_tez1.q,\ + bucket_map_join_tez2.q,\ + bucketpruning1.q,\ + constprog_dpp.q,\ + dynamic_partition_pruning.q,\ + dynamic_partition_pruning_2.q,\ + explainuser_1.q,\ + explainuser_2.q,\ + explainuser_4.q,\ + hybridgrace_hashjoin_1.q,\ + hybridgrace_hashjoin_2.q,\ + llap_nullscan.q,\ + llap_udf.q,\ + llapdecider.q,\ + lvj_mapjoin.q,\ + mapjoin_decimal.q,\ + mergejoin_3way.q,\ + mrr.q,\ + orc_llap.q,\ + orc_llap_counters.q,\ + orc_llap_counters1.q,\ + orc_llap_nonvector.q,\ + orc_merge_diff_fs.q,\ + orc_ppd_basic.q,\ + schema_evol_orc_acid_part.q,\ + schema_evol_orc_acid_part_update.q,\ + schema_evol_orc_acid_table.q,\ + schema_evol_orc_acid_table_update.q,\ + schema_evol_orc_acidvec_part.q,\ + schema_evol_orc_acidvec_part_update.q,\ + schema_evol_orc_acidvec_table.q,\ + schema_evol_orc_acidvec_table_update.q,\ + schema_evol_orc_nonvec_part.q,\ + schema_evol_orc_nonvec_part_all_complex.q,\ + schema_evol_orc_nonvec_part_all_primitive.q,\ + schema_evol_orc_nonvec_table.q,\ + schema_evol_orc_vec_part.q,\ + schema_evol_orc_vec_part_all_complex.q,\ + schema_evol_orc_vec_part_all_primitive.q,\ + schema_evol_orc_vec_table.q,\ + schema_evol_stats.q,\ + schema_evol_text_nonvec_part.q,\ + schema_evol_text_nonvec_part_all_complex.q,\ + schema_evol_text_nonvec_part_all_primitive.q,\ + schema_evol_text_nonvec_table.q,\ + schema_evol_text_vec_part.q,\ + schema_evol_text_vec_part_all_complex.q,\ + schema_evol_text_vec_part_all_primitive.q,\ + schema_evol_text_vec_table.q,\ + schema_evol_text_vecrow_part.q,\ + schema_evol_text_vecrow_part_all_complex.q,\ + schema_evol_text_vecrow_part_all_primitive.q,\ + schema_evol_text_vecrow_table.q,\ + smb_cache.q,\ + tez_aggr_part_stats.q,\ + tez_bmj_schema_evolution.q,\ + tez_dml.q,\ + tez_dynpart_hashjoin_1.q,\ + tez_dynpart_hashjoin_2.q,\ + tez_dynpart_hashjoin_3.q,\ + tez_fsstat.q,\ + tez_insert_overwrite_local_directory_1.q,\ + tez_join.q,\ + tez_join_result_complex.q,\ + tez_join_tests.q,\ + tez_joins_explain.q,\ + tez_multi_union.q,\ + tez_schema_evolution.q,\ + tez_self_join.q,\ + tez_smb_1.q,\ + tez_smb_empty.q,\ + tez_smb_main.q,\ + tez_union.q,\ + tez_union2.q,\ + tez_union_decimal.q,\ + tez_union_dynamic_partition.q,\ + tez_union_group_by.q,\ + tez_union_multiinsert.q,\ + tez_union_view.q,\ + tez_vector_dynpart_hashjoin_1.q,\ + tez_vector_dynpart_hashjoin_2.q,\ + vectorized_dynamic_partition_pruning.q,\ + windowing_gby.q + +encrypted.query.files=encryption_join_unencrypted_tbl.q,\ + encryption_insert_partition_static.q,\ + encryption_insert_partition_dynamic.q,\ + encryption_join_with_different_encryption_keys.q,\ + encryption_select_read_only_encrypted_tbl.q,\ + encryption_select_read_only_unencrypted_tbl.q,\ + encryption_load_data_to_encrypted_tables.q, \ + encryption_unencrypted_nonhdfs_external_tables.q \ + encryption_move_tbl.q \ + encryption_drop_table.q \ + encryption_insert_values.q \ + encryption_drop_view.q \ + encryption_drop_partition.q \ + encryption_with_trash.q \ + encryption_ctas.q + +beeline.positive.exclude=add_part_exist.q,\ + alter1.q,\ + alter2.q,\ + alter4.q,\ + alter5.q,\ + alter_rename_partition.q,\ + alter_rename_partition_authorization.q,\ + archive.q,\ + archive_corrupt.q,\ + archive_mr_1806.q,\ + archive_multi.q,\ + archive_multi_mr_1806.q,\ + authorization_1.q,\ + authorization_2.q,\ + authorization_4.q,\ + authorization_5.q,\ + authorization_6.q,\ + authorization_7.q,\ + ba_table1.q,\ + ba_table2.q,\ + ba_table3.q,\ + ba_table_udfs.q,\ + binary_table_bincolserde.q,\ + binary_table_colserde.q,\ + cluster.q,\ + columnarserde_create_shortcut.q,\ + combine2.q,\ + constant_prop.q,\ + create_nested_type.q,\ + create_or_replace_view.q,\ + create_struct_table.q,\ + create_union_table.q,\ + database.q,\ + database_location.q,\ + database_properties.q,\ + describe_database_json.q,\ + drop_database_removes_partition_dirs.q,\ + escape1.q,\ + escape2.q,\ + exim_00_nonpart_empty.q,\ + exim_01_nonpart.q,\ + exim_02_00_part_empty.q,\ + exim_02_part.q,\ + exim_03_nonpart_over_compat.q,\ + exim_04_all_part.q,\ + exim_04_evolved_parts.q,\ + exim_05_some_part.q,\ + exim_06_one_part.q,\ + exim_07_all_part_over_nonoverlap.q,\ + exim_08_nonpart_rename.q,\ + exim_09_part_spec_nonoverlap.q,\ + exim_10_external_managed.q,\ + exim_11_managed_external.q,\ + exim_12_external_location.q,\ + exim_13_managed_location.q,\ + exim_14_managed_location_over_existing.q,\ + exim_15_external_part.q,\ + exim_16_part_external.q,\ + exim_17_part_managed.q,\ + exim_18_part_external.q,\ + exim_19_00_part_external_location.q,\ + exim_19_part_external_location.q,\ + exim_20_part_managed_location.q,\ + exim_21_export_authsuccess.q,\ + exim_22_import_exist_authsuccess.q,\ + exim_23_import_part_authsuccess.q,\ + exim_24_import_nonexist_authsuccess.q,\ + global_limit.q,\ + groupby_complex_types.q,\ + groupby_complex_types_multi_single_reducer.q,\ + index_auth.q,\ + index_auto.q,\ + index_auto_empty.q,\ + index_bitmap.q,\ + index_bitmap1.q,\ + index_bitmap2.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ + index_bitmap_rc.q,\ + index_compact.q,\ + index_compact_1.q,\ + index_compact_2.q,\ + index_compact_3.q,\ + index_stale_partitioned.q,\ + init_file.q,\ + input16.q,\ + input16_cc.q,\ + input46.q,\ + input_columnarserde.q,\ + input_dynamicserde.q,\ + input_lazyserde.q,\ + input_testxpath3.q,\ + input_testxpath4.q,\ + insert2_overwrite_partitions.q,\ + insertexternal1.q,\ + join_thrift.q,\ + lateral_view.q,\ + load_binary_data.q,\ + load_exist_part_authsuccess.q,\ + load_nonpart_authsuccess.q,\ + load_part_authsuccess.q,\ + loadpart_err.q,\ + lock1.q,\ + lock2.q,\ + lock3.q,\ + lock4.q,\ + merge_dynamic_partition.q,\ + multi_insert.q,\ + multi_insert_move_tasks_share_dependencies.q,\ + null_column.q,\ + ppd_clusterby.q,\ + query_with_semi.q,\ + rename_column.q,\ + sample6.q,\ + sample_islocalmode_hook.q,\ + set_processor_namespaces.q,\ + show_tables.q,\ + source.q,\ + split_sample.q,\ + str_to_map.q,\ + transform1.q,\ + udaf_collect_set.q,\ + udaf_context_ngrams.q,\ + udaf_histogram_numeric.q,\ + udaf_ngrams.q,\ + udaf_percentile_approx.q,\ + udf_array.q,\ + udf_bitmap_and.q,\ + udf_bitmap_or.q,\ + udf_explode.q,\ + udf_format_number.q,\ + udf_map.q,\ + udf_map_keys.q,\ + udf_map_values.q,\ + udf_mask.q,\ + udf_mask_first_n.q,\ + udf_mask_hash.q,\ + udf_mask_last_n.q,\ + udf_mask_show_first_n.q,\ + udf_mask_show_last_n.q,\ + udf_max.q,\ + udf_min.q,\ + udf_named_struct.q,\ + udf_percentile.q,\ + udf_printf.q,\ + udf_sentences.q,\ + udf_sort_array.q,\ + udf_split.q,\ + udf_struct.q,\ + udf_substr.q,\ + udf_translate.q,\ + udf_union.q,\ + udf_xpath.q,\ + udtf_stack.q,\ + view.q,\ + virtual_column.q + +minimr.query.negative.files=cluster_tasklog_retrieval.q,\ + file_with_header_footer_negative.q,\ + local_mapred_error_cache.q,\ + mapreduce_stack_trace.q,\ + mapreduce_stack_trace_hadoop20.q,\ + mapreduce_stack_trace_turnoff.q,\ + mapreduce_stack_trace_turnoff_hadoop20.q,\ + minimr_broken_pipe.q,\ + table_nonprintable_negative.q,\ + udf_local_resource.q + +# tests are sorted use: perl -pe 's@\\\s*\n@ @g' testconfiguration.properties \ +# | awk -F= '/spark.query.files/{print $2}' | perl -pe 's@.q *, *@\n@g' \ +# | egrep -v '^ *$' | sort -V | uniq | perl -pe 's@\n@.q, \\\n@g' | perl -pe 's@^@ @g' +spark.query.files=add_part_multiple.q, \ + alter_merge_orc.q, \ + alter_merge_stats_orc.q, \ + annotate_stats_join.q, \ + auto_join0.q, \ + auto_join1.q, \ + auto_join10.q, \ + auto_join11.q, \ + auto_join12.q, \ + auto_join13.q, \ + auto_join14.q, \ + auto_join15.q, \ + auto_join16.q, \ + auto_join17.q, \ + auto_join18.q, \ + auto_join18_multi_distinct.q, \ + auto_join19.q, \ + auto_join2.q, \ + auto_join20.q, \ + auto_join21.q, \ + auto_join22.q, \ + auto_join23.q, \ + auto_join24.q, \ + auto_join26.q, \ + auto_join27.q, \ + auto_join28.q, \ + auto_join29.q, \ + auto_join3.q, \ + auto_join30.q, \ + auto_join31.q, \ + auto_join4.q, \ + auto_join5.q, \ + auto_join6.q, \ + auto_join7.q, \ + auto_join8.q, \ + auto_join9.q, \ + auto_join_filters.q, \ + auto_join_nulls.q, \ + auto_join_reordering_values.q, \ + auto_join_stats.q, \ + auto_join_stats2.q, \ + auto_join_without_localtask.q, \ + auto_smb_mapjoin_14.q, \ + auto_sortmerge_join_1.q, \ + auto_sortmerge_join_10.q, \ + auto_sortmerge_join_12.q, \ + auto_sortmerge_join_13.q, \ + auto_sortmerge_join_14.q, \ + auto_sortmerge_join_15.q, \ + auto_sortmerge_join_16.q, \ + auto_sortmerge_join_3.q, \ + auto_sortmerge_join_4.q, \ + auto_sortmerge_join_5.q, \ + auto_sortmerge_join_6.q, \ + auto_sortmerge_join_7.q, \ + auto_sortmerge_join_8.q, \ + auto_sortmerge_join_9.q, \ + avro_compression_enabled_native.q, \ + avro_decimal_native.q, \ + avro_joins.q, \ + avro_joins_native.q, \ + bucket2.q, \ + bucket3.q, \ + bucket4.q, \ + bucket_map_join_1.q, \ + bucket_map_join_2.q, \ + bucket_map_join_spark1.q, \ + bucket_map_join_spark2.q, \ + bucket_map_join_spark3.q, \ + bucket_map_join_spark4.q, \ + bucket_map_join_tez1.q, \ + bucket_map_join_tez2.q, \ + bucketmapjoin1.q, \ + bucketmapjoin10.q, \ + bucketmapjoin11.q, \ + bucketmapjoin12.q, \ + bucketmapjoin13.q, \ + bucketmapjoin2.q, \ + bucketmapjoin3.q, \ + bucketmapjoin4.q, \ + bucketmapjoin5.q, \ + bucketmapjoin7.q, \ + bucketmapjoin8.q, \ + bucketmapjoin9.q, \ + bucketmapjoin_negative.q, \ + bucketmapjoin_negative2.q, \ + bucketmapjoin_negative3.q, \ + bucketsortoptimize_insert_2.q, \ + bucketsortoptimize_insert_4.q, \ + bucketsortoptimize_insert_6.q, \ + bucketsortoptimize_insert_7.q, \ + bucketsortoptimize_insert_8.q, \ + cbo_gby.q, \ + cbo_gby_empty.q, \ + cbo_limit.q, \ + cbo_semijoin.q, \ + cbo_simple_select.q, \ + cbo_stats.q, \ + cbo_subq_in.q, \ + cbo_subq_not_in.q, \ + cbo_udf_udaf.q, \ + cbo_union.q, \ + column_access_stats.q, \ + count.q, \ + create_merge_compressed.q, \ + cross_join.q, \ + cross_product_check_1.q, \ + cross_product_check_2.q, \ + ctas.q, \ + custom_input_output_format.q, \ + date_join1.q, \ + date_udf.q, \ + decimal_1_1.q, \ + decimal_join.q, \ + disable_merge_for_bucketing.q, \ + dynamic_rdd_cache.q, \ + enforce_order.q, \ + escape_clusterby1.q, \ + escape_distributeby1.q, \ + escape_orderby1.q, \ + escape_sortby1.q, \ + filter_join_breaktask.q, \ + filter_join_breaktask2.q, \ + groupby1.q, \ + groupby10.q, \ + groupby11.q, \ + groupby1_map.q, \ + groupby1_map_nomap.q, \ + groupby1_map_skew.q, \ + groupby1_noskew.q, \ + groupby2.q, \ + groupby2_map.q, \ + groupby2_map_multi_distinct.q, \ + groupby2_map_skew.q, \ + groupby2_noskew.q, \ + groupby2_noskew_multi_distinct.q, \ + groupby3.q, \ + groupby3_map.q, \ + groupby3_map_multi_distinct.q, \ + groupby3_map_skew.q, \ + groupby3_noskew.q, \ + groupby3_noskew_multi_distinct.q, \ + groupby4.q, \ + groupby4_map.q, \ + groupby4_map_skew.q, \ + groupby4_noskew.q, \ + groupby5.q, \ + groupby5_map.q, \ + groupby5_map_skew.q, \ + groupby5_noskew.q, \ + groupby6.q, \ + groupby6_map.q, \ + groupby6_map_skew.q, \ + groupby6_noskew.q, \ + groupby7.q, \ + groupby7_map.q, \ + groupby7_map_multi_single_reducer.q, \ + groupby7_map_skew.q, \ + groupby7_noskew.q, \ + groupby7_noskew_multi_single_reducer.q, \ + groupby8.q, \ + groupby8_map.q, \ + groupby8_map_skew.q, \ + groupby8_noskew.q, \ + groupby9.q, \ + groupby_bigdata.q, \ + groupby_complex_types.q, \ + groupby_complex_types_multi_single_reducer.q, \ + groupby_cube1.q, \ + groupby_grouping_id2.q, \ + groupby_map_ppr.q, \ + groupby_map_ppr_multi_distinct.q, \ + groupby_multi_insert_common_distinct.q, \ + groupby_multi_single_reducer.q, \ + groupby_multi_single_reducer2.q, \ + groupby_multi_single_reducer3.q, \ + groupby_position.q, \ + groupby_ppr.q, \ + groupby_ppr_multi_distinct.q, \ + groupby_resolution.q, \ + groupby_rollup1.q, \ + groupby_sort_1_23.q, \ + groupby_sort_skew_1.q, \ + groupby_sort_skew_1_23.q, \ + qroupby_limit_extrastep.q, \ + having.q, \ + identity_project_remove_skip.q, \ + index_auto_self_join.q, \ + innerjoin.q, \ + input12.q, \ + input13.q, \ + input14.q, \ + input17.q, \ + input18.q, \ + input1_limit.q, \ + input_part2.q, \ + insert_into1.q, \ + insert_into2.q, \ + insert_into3.q, \ + join0.q, \ + join1.q, \ + join10.q, \ + join11.q, \ + join12.q, \ + join13.q, \ + join14.q, \ + join15.q, \ + join16.q, \ + join17.q, \ + join18.q, \ + join18_multi_distinct.q, \ + join19.q, \ + join2.q, \ + join20.q, \ + join21.q, \ + join22.q, \ + join23.q, \ + join24.q, \ + join25.q, \ + join26.q, \ + join27.q, \ + join28.q, \ + join29.q, \ + join3.q, \ + join30.q, \ + join31.q, \ + join32.q, \ + join32_lessSize.q, \ + join33.q, \ + join34.q, \ + join35.q, \ + join36.q, \ + join37.q, \ + join38.q, \ + join39.q, \ + join4.q, \ + join41.q, \ + join5.q, \ + join6.q, \ + join7.q, \ + join8.q, \ + join9.q, \ + join_1to1.q, \ + join_alt_syntax.q, \ + join_array.q, \ + join_casesensitive.q, \ + join_cond_pushdown_1.q, \ + join_cond_pushdown_2.q, \ + join_cond_pushdown_3.q, \ + join_cond_pushdown_4.q, \ + join_cond_pushdown_unqual1.q, \ + join_cond_pushdown_unqual2.q, \ + join_cond_pushdown_unqual3.q, \ + join_cond_pushdown_unqual4.q, \ + join_filters_overlap.q, \ + join_hive_626.q, \ + join_literals.q, \ + join_map_ppr.q, \ + join_merge_multi_expressions.q, \ + join_merging.q, \ + join_nullsafe.q, \ + join_rc.q, \ + join_reorder.q, \ + join_reorder2.q, \ + join_reorder3.q, \ + join_reorder4.q, \ + join_star.q, \ + join_thrift.q, \ + join_vc.q, \ + join_view.q, \ + lateral_view_explode2.q, \ + leftsemijoin.q, \ + leftsemijoin_mr.q, \ + limit_partition_metadataonly.q, \ + limit_pushdown.q, \ + list_bucket_dml_2.q, \ + load_dyn_part1.q, \ + load_dyn_part10.q, \ + load_dyn_part11.q, \ + load_dyn_part12.q, \ + load_dyn_part13.q, \ + load_dyn_part14.q, \ + load_dyn_part15.q, \ + load_dyn_part2.q, \ + load_dyn_part3.q, \ + load_dyn_part4.q, \ + load_dyn_part5.q, \ + load_dyn_part6.q, \ + load_dyn_part7.q, \ + load_dyn_part8.q, \ + load_dyn_part9.q, \ + louter_join_ppr.q, \ + mapjoin1.q, \ + mapjoin_addjar.q, \ + mapjoin_decimal.q, \ + mapjoin_distinct.q, \ + mapjoin_filter_on_outerjoin.q, \ + mapjoin_mapjoin.q, \ + mapjoin_memcheck.q, \ + mapjoin_subquery.q, \ + mapjoin_subquery2.q, \ + mapjoin_test_outer.q, \ + mapreduce1.q, \ + mapreduce2.q, \ + merge1.q, \ + merge2.q, \ + mergejoins.q, \ + mergejoins_mixed.q, \ + metadata_only_queries.q, \ + metadata_only_queries_with_filters.q, \ + multi_insert.q, \ + multi_insert_gby.q, \ + multi_insert_gby2.q, \ + multi_insert_gby3.q, \ + multi_insert_lateral_view.q, \ + multi_insert_mixed.q, \ + multi_insert_move_tasks_share_dependencies.q, \ + multi_insert_with_join.q, \ + multi_join_union.q, \ + multi_join_union_src.q, \ + multigroupby_singlemr.q, \ + nullgroup.q, \ + nullgroup2.q, \ + nullgroup4.q, \ + nullgroup4_multi_distinct.q, \ + optimize_nullscan.q, \ + order.q, \ + order2.q, \ + outer_join_ppr.q, \ + parallel.q, \ + parallel_join0.q, \ + parallel_join1.q, \ + parquet_join.q, \ + pcr.q, \ + ppd_gby_join.q, \ + ppd_join.q, \ + ppd_join2.q, \ + ppd_join3.q, \ + ppd_join5.q, \ + ppd_join_filter.q, \ + ppd_multi_insert.q, \ + ppd_outer_join1.q, \ + ppd_outer_join2.q, \ + ppd_outer_join3.q, \ + ppd_outer_join4.q, \ + ppd_outer_join5.q, \ + ppd_transform.q, \ + ptf.q, \ + ptf_decimal.q, \ + ptf_general_queries.q, \ + ptf_matchpath.q, \ + ptf_rcfile.q, \ + ptf_register_tblfn.q, \ + ptf_seqfile.q, \ + ptf_streaming.q, \ + rcfile_bigdata.q, \ + reduce_deduplicate_exclude_join.q, \ + router_join_ppr.q, \ + runtime_skewjoin_mapjoin_spark.q, \ + sample1.q, \ + sample10.q, \ + sample2.q, \ + sample3.q, \ + sample4.q, \ + sample5.q, \ + sample6.q, \ + sample7.q, \ + sample8.q, \ + sample9.q, \ + script_env_var1.q, \ + script_env_var2.q, \ + script_pipe.q, \ + scriptfile1.q, \ + semijoin.q, \ + skewjoin.q, \ + skewjoin_noskew.q, \ + skewjoin_union_remove_1.q, \ + skewjoin_union_remove_2.q, \ + skewjoinopt1.q, \ + skewjoinopt10.q, \ + skewjoinopt11.q, \ + skewjoinopt12.q, \ + skewjoinopt13.q, \ + skewjoinopt14.q, \ + skewjoinopt15.q, \ + skewjoinopt16.q, \ + skewjoinopt17.q, \ + skewjoinopt18.q, \ + skewjoinopt19.q, \ + skewjoinopt2.q, \ + skewjoinopt20.q, \ + skewjoinopt3.q, \ + skewjoinopt4.q, \ + skewjoinopt5.q, \ + skewjoinopt6.q, \ + skewjoinopt7.q, \ + skewjoinopt8.q, \ + skewjoinopt9.q, \ + smb_mapjoin_1.q, \ + smb_mapjoin_10.q, \ + smb_mapjoin_11.q, \ + smb_mapjoin_12.q, \ + smb_mapjoin_13.q, \ + smb_mapjoin_14.q, \ + smb_mapjoin_15.q, \ + smb_mapjoin_16.q, \ + smb_mapjoin_17.q, \ + smb_mapjoin_18.q, \ + smb_mapjoin_19.q, \ + smb_mapjoin_2.q, \ + smb_mapjoin_20.q, \ + smb_mapjoin_21.q, \ + smb_mapjoin_22.q, \ + smb_mapjoin_25.q, \ + smb_mapjoin_3.q, \ + smb_mapjoin_4.q, \ + smb_mapjoin_5.q, \ + smb_mapjoin_6.q, \ + smb_mapjoin_7.q, \ + smb_mapjoin_8.q, \ + smb_mapjoin_9.q, \ + sort.q, \ + stats0.q, \ + stats1.q, \ + stats10.q, \ + stats12.q, \ + stats13.q, \ + stats14.q, \ + stats15.q, \ + stats16.q, \ + stats18.q, \ + stats2.q, \ + stats3.q, \ + stats5.q, \ + stats6.q, \ + stats7.q, \ + stats8.q, \ + stats9.q, \ + stats_noscan_1.q, \ + stats_noscan_2.q, \ + stats_only_null.q, \ + stats_partscan_1_23.q, \ + statsfs.q, \ + subquery_exists.q, \ + subquery_in.q, \ + subquery_multiinsert.q, \ + table_access_keys_stats.q, \ + temp_table.q, \ + temp_table_gb1.q, \ + temp_table_join1.q, \ + tez_join_tests.q, \ + tez_joins_explain.q, \ + timestamp_1.q, \ + timestamp_2.q, \ + timestamp_3.q, \ + timestamp_comparison.q, \ + timestamp_lazy.q, \ + timestamp_null.q, \ + timestamp_udf.q, \ + transform2.q, \ + transform_ppr1.q, \ + transform_ppr2.q, \ + udaf_collect_set.q, \ + udf_example_add.q, \ + udf_in_file.q, \ + udf_max.q, \ + udf_min.q, \ + udf_percentile.q, \ + union.q, \ + union10.q, \ + union11.q, \ + union12.q, \ + union13.q, \ + union14.q, \ + union15.q, \ + union16.q, \ + union17.q, \ + union18.q, \ + union19.q, \ + union2.q, \ + union20.q, \ + union21.q, \ + union22.q, \ + union23.q, \ + union24.q, \ + union25.q, \ + union26.q, \ + union27.q, \ + union28.q, \ + union29.q, \ + union3.q, \ + union30.q, \ + union31.q, \ + union32.q, \ + union33.q, \ + union34.q, \ + union4.q, \ + union5.q, \ + union6.q, \ + union7.q, \ + union8.q, \ + union9.q, \ + union_date.q, \ + union_date_trim.q, \ + union_lateralview.q, \ + union_null.q, \ + union_ppr.q, \ + union_remove_1.q, \ + union_remove_10.q, \ + union_remove_11.q, \ + union_remove_12.q, \ + union_remove_13.q, \ + union_remove_14.q, \ + union_remove_15.q, \ + union_remove_16.q, \ + union_remove_17.q, \ + union_remove_18.q, \ + union_remove_19.q, \ + union_remove_2.q, \ + union_remove_20.q, \ + union_remove_21.q, \ + union_remove_22.q, \ + union_remove_23.q, \ + union_remove_24.q, \ + union_remove_25.q, \ + union_remove_3.q, \ + union_remove_4.q, \ + union_remove_5.q, \ + union_remove_6.q, \ + union_remove_6_subq.q, \ + union_remove_7.q, \ + union_remove_8.q, \ + union_remove_9.q, \ + union_script.q, \ + union_top_level.q, \ + union_view.q, \ + uniquejoin.q, \ + varchar_join1.q, \ + vector_between_in.q, \ + vector_cast_constant.q, \ + vector_char_4.q, \ + vector_count_distinct.q, \ + vector_data_types.q, \ + vector_decimal_aggregate.q, \ + vector_decimal_mapjoin.q, \ + vector_distinct_2.q, \ + vector_elt.q, \ + vector_groupby_3.q, \ + vector_left_outer_join.q, \ + vector_mapjoin_reduce.q, \ + vector_orderby_5.q, \ + vector_string_concat.q, \ + vector_varchar_4.q, \ + vectorization_0.q, \ + vectorization_1.q, \ + vectorization_10.q, \ + vectorization_11.q, \ + vectorization_12.q, \ + vectorization_13.q, \ + vectorization_14.q, \ + vectorization_15.q, \ + vectorization_16.q, \ + vectorization_17.q, \ + vectorization_2.q, \ + vectorization_3.q, \ + vectorization_4.q, \ + vectorization_5.q, \ + vectorization_6.q, \ + vectorization_9.q, \ + vectorization_decimal_date.q, \ + vectorization_div0.q, \ + vectorization_nested_udf.q, \ + vectorization_not.q, \ + vectorization_part.q, \ + vectorization_part_project.q, \ + vectorization_pushdown.q, \ + vectorization_short_regress.q, \ + vectorized_case.q, \ + vectorized_mapjoin.q, \ + vectorized_math_funcs.q, \ + vectorized_nested_mapjoin.q, \ + vectorized_ptf.q, \ + vectorized_rcfile_columnar.q, \ + vectorized_shufflejoin.q, \ + vectorized_string_funcs.q, \ + vectorized_timestamp_funcs.q, \ + windowing.q + +# Unlike "spark.query.files" above, these tests only run +# under Spark engine. +spark.only.query.files=spark_dynamic_partition_pruning.q,\ + spark_dynamic_partition_pruning_2.q,\ + spark_vectorized_dynamic_partition_pruning.q + +miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\ + bucket4.q,\ + bucket5.q,\ + bucket6.q,\ + bucketizedhiveinputformat.q,\ + bucketmapjoin6.q,\ + bucketmapjoin7.q,\ + constprog_partitioner.q,\ + constprog_semijoin.q,\ + disable_merge_for_bucketing.q,\ + empty_dir_in_table.q,\ + external_table_with_space_in_location_path.q,\ + file_with_header_footer.q,\ + gen_udf_example_add10.q,\ + import_exported_table.q,\ + index_bitmap3.q,\ + index_bitmap_auto.q,\ + infer_bucket_sort_bucketed_table.q,\ + infer_bucket_sort_map_operators.q,\ + infer_bucket_sort_merge.q,\ + infer_bucket_sort_num_buckets.q,\ + infer_bucket_sort_reducers_power_two.q,\ + input16_cc.q,\ + insert_overwrite_directory2.q,\ + leftsemijoin_mr.q,\ + list_bucket_dml_10.q,\ + load_fs2.q,\ + load_hdfs_file_with_space_in_the_name.q,\ + orc_merge1.q,\ + orc_merge2.q,\ + orc_merge3.q,\ + orc_merge4.q,\ + orc_merge5.q,\ + orc_merge6.q,\ + orc_merge7.q,\ + orc_merge8.q,\ + orc_merge9.q,\ + orc_merge_diff_fs.q,\ + orc_merge_incompat1.q,\ + orc_merge_incompat2.q,\ + parallel_orderby.q,\ + quotedid_smb.q,\ + reduce_deduplicate.q,\ + remote_script.q,\ + root_dir_external_table.q,\ + schemeAuthority.q,\ + schemeAuthority2.q,\ + scriptfile1.q,\ + scriptfile1_win.q,\ + temp_table_external.q,\ + truncate_column_buckets.q,\ + uber_reduce.q,\ + vector_inner_join.q,\ + vector_outer_join0.q,\ + vector_outer_join1.q,\ + vector_outer_join2.q,\ + vector_outer_join3.q,\ + vector_outer_join4.q,\ + vector_outer_join5.q + +# These tests are removed from miniSparkOnYarn.query.files +# ql_rewrite_gbtoidx.q,\ +# ql_rewrite_gbtoidx_cbo_1.q,\ +# smb_mapjoin_8.q,\ + + +spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ + groupby2_multi_distinct.q,\ + groupby3_map_skew_multi_distinct.q,\ + groupby3_multi_distinct.q,\ + groupby_grouping_sets7.q http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index 748c19a..f5befaf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; @@ -71,7 +73,7 @@ public class Context { private int resDirFilesNum; boolean initialized; String originalTracker = null; - private final CompilationOpContext opContext; + private CompilationOpContext opContext; private final Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>(); // scratch path to use for all non-local (ie. hdfs) file system tmp folders @@ -88,10 +90,9 @@ public class Context { private final Configuration conf; protected int pathid = 10000; - protected boolean explain = false; + protected ExplainConfiguration explainConfig = null; protected String cboInfo; protected boolean cboSucceeded; - protected boolean explainLogical = false; protected String cmd = ""; // number of previous attempts protected int tryCount = 0; @@ -274,34 +275,25 @@ public class Context { } /** - * Set the context on whether the current query is an explain query. - * @param value true if the query is an explain query, false if not + * Find whether we should execute the current query due to explain + * @return true if the query needs to be executed, false if not */ - public void setExplain(boolean value) { - explain = value; - } - - /** - * Find whether the current query is an explain query - * @return true if the query is an explain query, false if not - */ - public boolean getExplain() { - return explain; + public boolean isExplainSkipExecution() { + return (explainConfig != null && explainConfig.getAnalyze() != AnalyzeState.RUNNING); } /** * Find whether the current query is a logical explain query */ public boolean getExplainLogical() { - return explainLogical; + return explainConfig != null && explainConfig.isLogical(); } - /** - * Set the context on whether the current query is a logical - * explain query. - */ - public void setExplainLogical(boolean explainLogical) { - this.explainLogical = explainLogical; + public AnalyzeState getExplainAnalyze() { + if (explainConfig != null) { + return explainConfig.getAnalyze(); + } + return null; } /** @@ -448,7 +440,7 @@ public class Context { // if we are executing entirely on the client side - then // just (re)use the local scratch directory if(isLocalOnlyExecutionMode()) { - return getLocalScratchDir(!explain); + return getLocalScratchDir(!isExplainSkipExecution()); } try { @@ -456,7 +448,7 @@ public class Context { URI uri = dir.toUri(); Path newScratchDir = getScratchDir(uri.getScheme(), uri.getAuthority(), - !explain, uri.getPath()); + !isExplainSkipExecution(), uri.getPath()); LOG.info("New scratch dir is " + newScratchDir); return newScratchDir; } catch (IOException e) { @@ -468,7 +460,7 @@ public class Context { } private Path getExternalScratchDir(URI extURI) { - return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !explain); + return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !isExplainSkipExecution()); } /** @@ -531,7 +523,7 @@ public class Context { } public Path getMRTmpPath(URI uri) { - return new Path(getStagingDir(new Path(uri), !explain), MR_PREFIX + nextPathId()); + return new Path(getStagingDir(new Path(uri), !isExplainSkipExecution()), MR_PREFIX + nextPathId()); } /** @@ -577,7 +569,7 @@ public class Context { * path within /tmp */ public Path getExtTmpPathRelTo(Path path) { - return new Path(getStagingDir(path, !explain), EXT_PREFIX + nextPathId()); + return new Path(getStagingDir(path, !isExplainSkipExecution()), EXT_PREFIX + nextPathId()); } /** @@ -725,7 +717,7 @@ public class Context { * the stream being used */ public void setTokenRewriteStream(TokenRewriteStream tokenRewriteStream) { - assert (this.tokenRewriteStream == null); + assert (this.tokenRewriteStream == null || this.getExplainAnalyze() == AnalyzeState.RUNNING); this.tokenRewriteStream = tokenRewriteStream; } @@ -908,4 +900,17 @@ public class Context { public void setIsUpdateDeleteMerge(boolean isUpdate) { this.isUpdateDeleteMerge = isUpdate; } + + public ExplainConfiguration getExplainConfig() { + return explainConfig; + } + + public void setExplainConfig(ExplainConfiguration explainConfig) { + this.explainConfig = explainConfig; + } + + public void resetOpContext(){ + opContext = new CompilationOpContext(); + sequencer = new AtomicInteger(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig new file mode 100644 index 0000000..4667f68 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java.orig @@ -0,0 +1,829 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +import java.io.DataInput; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +import org.antlr.runtime.TokenRewriteStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.BlobStorageUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.TaskRunner; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager.Heartbeater; +import org.apache.hadoop.hive.ql.lockmgr.HiveLock; +import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; +import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; +import org.apache.hadoop.hive.ql.lockmgr.LockException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Context for Semantic Analyzers. Usage: not reusable - construct a new one for + * each query should call clear() at end of use to remove temporary folders + */ +public class Context { + private boolean isHDFSCleanup; + private Path resFile; + private Path resDir; + private FileSystem resFs; + private static final Logger LOG = LoggerFactory.getLogger("hive.ql.Context"); + private Path[] resDirPaths; + private int resDirFilesNum; + boolean initialized; + String originalTracker = null; + private final CompilationOpContext opContext; + private final Map<String, ContentSummary> pathToCS = new ConcurrentHashMap<String, ContentSummary>(); + + // scratch path to use for all non-local (ie. hdfs) file system tmp folders + private final Path nonLocalScratchPath; + + // scratch directory to use for local file system tmp folders + private final String localScratchDir; + + // the permission to scratch directory (local and hdfs) + private final String scratchDirPermission; + + // Keeps track of scratch directories created for different scheme/authority + private final Map<String, Path> fsScratchDirs = new HashMap<String, Path>(); + + private final Configuration conf; + protected int pathid = 10000; + protected boolean explain = false; + protected String cboInfo; + protected boolean cboSucceeded; + protected boolean explainLogical = false; + protected String cmd = ""; + // number of previous attempts + protected int tryCount = 0; + private TokenRewriteStream tokenRewriteStream; + + private String executionId; + + // List of Locks for this query + protected List<HiveLock> hiveLocks; + + // Transaction manager for this query + protected HiveTxnManager hiveTxnManager; + + // Used to track what type of acid operation (insert, update, or delete) we are doing. Useful + // since we want to change where bucket columns are accessed in some operators and + // optimizations when doing updates and deletes. + private AcidUtils.Operation acidOperation = AcidUtils.Operation.NOT_ACID; + + private boolean needLockMgr; + + private AtomicInteger sequencer = new AtomicInteger(); + + private final Map<String, Table> cteTables = new HashMap<String, Table>(); + + // Keep track of the mapping from load table desc to the output and the lock + private final Map<LoadTableDesc, WriteEntity> loadTableOutputMap = + new HashMap<LoadTableDesc, WriteEntity>(); + private final Map<WriteEntity, List<HiveLockObj>> outputLockObjects = + new HashMap<WriteEntity, List<HiveLockObj>>(); + + private final String stagingDir; + + private Heartbeater heartbeater; + + private boolean skipTableMasking; + + public Context(Configuration conf) throws IOException { + this(conf, generateExecutionId()); + } + + /** + * Create a Context with a given executionId. ExecutionId, together with + * user name and conf, will determine the temporary directory locations. + */ + public Context(Configuration conf, String executionId) { + this.conf = conf; + this.executionId = executionId; + + // local & non-local tmp location is configurable. however it is the same across + // all external file systems + nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId); + localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath(); + scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION); + stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR); + opContext = new CompilationOpContext(); + } + + + public Map<LoadTableDesc, WriteEntity> getLoadTableOutputMap() { + return loadTableOutputMap; + } + + public Map<WriteEntity, List<HiveLockObj>> getOutputLockObjects() { + return outputLockObjects; + } + + /** + * Set the context on whether the current query is an explain query. + * @param value true if the query is an explain query, false if not + */ + public void setExplain(boolean value) { + explain = value; + } + + /** + * Find whether the current query is an explain query + * @return true if the query is an explain query, false if not + */ + public boolean getExplain() { + return explain; + } + + /** + * Find whether the current query is a logical explain query + */ + public boolean getExplainLogical() { + return explainLogical; + } + + /** + * Set the context on whether the current query is a logical + * explain query. + */ + public void setExplainLogical(boolean explainLogical) { + this.explainLogical = explainLogical; + } + + /** + * Set the original query command. + * @param cmd the original query command string + */ + public void setCmd(String cmd) { + this.cmd = cmd; + } + + /** + * Find the original query command. + * @return the original query command string + */ + public String getCmd () { + return cmd; + } + + /** + * Gets a temporary staging directory related to a path. + * If a path already contains a staging directory, then returns the current directory; otherwise + * create the directory if needed. + * + * @param inputPath URI of the temporary directory + * @param mkdir Create the directory if True. + * @return A temporary path. + */ + private Path getStagingDir(Path inputPath, boolean mkdir) { + final URI inputPathUri = inputPath.toUri(); + final String inputPathName = inputPathUri.getPath(); + final String fileSystem = inputPathUri.getScheme() + ":" + inputPathUri.getAuthority(); + final FileSystem fs; + + try { + fs = inputPath.getFileSystem(conf); + } catch (IOException e) { + throw new IllegalStateException("Error getting FileSystem for " + inputPath + ": "+ e, e); + } + + String stagingPathName; + if (inputPathName.indexOf(stagingDir) == -1) { + stagingPathName = new Path(inputPathName, stagingDir).toString(); + } else { + stagingPathName = inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length()); + } + + final String key = fileSystem + "-" + stagingPathName + "-" + TaskRunner.getTaskRunnerID(); + + Path dir = fsScratchDirs.get(key); + if (dir == null) { + // Append task specific info to stagingPathName, instead of creating a sub-directory. + // This way we don't have to worry about deleting the stagingPathName separately at + // end of query execution. + dir = fs.makeQualified(new Path(stagingPathName + "_" + this.executionId + "-" + TaskRunner.getTaskRunnerID())); + + LOG.debug("Created staging dir = " + dir + " for path = " + inputPath); + + if (mkdir) { + try { + boolean inheritPerms = HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS); + if (!FileUtils.mkdir(fs, dir, inheritPerms, conf)) { + throw new IllegalStateException("Cannot create staging directory '" + dir.toString() + "'"); + } + + if (isHDFSCleanup) { + fs.deleteOnExit(dir); + } + } catch (IOException e) { + throw new RuntimeException("Cannot create staging directory '" + dir.toString() + "': " + e.getMessage(), e); + } + } + + fsScratchDirs.put(key, dir); + } + + return dir; + } + + /** + * Get a tmp directory on specified URI + * + * @param scheme Scheme of the target FS + * @param authority Authority of the target FS + * @param mkdir create the directory if true + * @param scratchDir path of tmp directory + */ + private Path getScratchDir(String scheme, String authority, + boolean mkdir, String scratchDir) { + + String fileSystem = scheme + ":" + authority; + Path dir = fsScratchDirs.get(fileSystem + "-" + TaskRunner.getTaskRunnerID()); + + if (dir == null) { + Path dirPath = new Path(scheme, authority, + scratchDir + "-" + TaskRunner.getTaskRunnerID()); + if (mkdir) { + try { + FileSystem fs = dirPath.getFileSystem(conf); + dirPath = new Path(fs.makeQualified(dirPath).toString()); + FsPermission fsPermission = new FsPermission(scratchDirPermission); + + if (!fs.mkdirs(dirPath, fsPermission)) { + throw new RuntimeException("Cannot make directory: " + + dirPath.toString()); + } + if (isHDFSCleanup) { + fs.deleteOnExit(dirPath); + } + } catch (IOException e) { + throw new RuntimeException (e); + } + } + dir = dirPath; + fsScratchDirs.put(fileSystem + "-" + TaskRunner.getTaskRunnerID(), dir); + + } + + return dir; + } + + + /** + * Create a local scratch directory on demand and return it. + */ + public Path getLocalScratchDir(boolean mkdir) { + try { + FileSystem fs = FileSystem.getLocal(conf); + URI uri = fs.getUri(); + return getScratchDir(uri.getScheme(), uri.getAuthority(), + mkdir, localScratchDir); + } catch (IOException e) { + throw new RuntimeException (e); + } + } + + + /** + * Create a map-reduce scratch directory on demand and return it. + * + */ + public Path getMRScratchDir() { + + // if we are executing entirely on the client side - then + // just (re)use the local scratch directory + if(isLocalOnlyExecutionMode()) { + return getLocalScratchDir(!explain); + } + + try { + Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf); + URI uri = dir.toUri(); + + Path newScratchDir = getScratchDir(uri.getScheme(), uri.getAuthority(), + !explain, uri.getPath()); + LOG.info("New scratch dir is " + newScratchDir); + return newScratchDir; + } catch (IOException e) { + throw new RuntimeException(e); + } catch (IllegalArgumentException e) { + throw new RuntimeException("Error while making MR scratch " + + "directory - check filesystem config (" + e.getCause() + ")", e); + } + } + + /** + * Create a temporary directory depending of the path specified. + * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS) + * - If path is on HDFS, then create a staging directory inside the path + * + * @param path Path used to verify the Filesystem to use for temporary directory + * @return A path to the new temporary directory + */ + public Path getTempDirForPath(Path path) { + boolean isLocal = isPathLocal(path); + if ((BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir(conf)) + || isLocal) { + // For better write performance, we use HDFS for temporary data when object store is used. + // Note that the scratch directory configuration variable must use HDFS or any other non-blobstorage system + // to take advantage of this performance. + return getMRTmpPath(); + } else { + return getExtTmpPathRelTo(path); + } + } + + /* + * Checks if the path is for the local filesystem or not + */ + private boolean isPathLocal(Path path) { + boolean isLocal = false; + if (path != null) { + String scheme = path.toUri().getScheme(); + if (scheme != null) { + isLocal = scheme.equals(Utilities.HADOOP_LOCAL_FS_SCHEME); + } + } + return isLocal; + } + + private Path getExternalScratchDir(URI extURI) { + return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !explain); + } + + /** + * Remove any created scratch directories. + */ + public void removeScratchDir() { + for (Map.Entry<String, Path> entry : fsScratchDirs.entrySet()) { + try { + Path p = entry.getValue(); + FileSystem fs = p.getFileSystem(conf); + fs.delete(p, true); + fs.cancelDeleteOnExit(p); + } catch (Exception e) { + LOG.warn("Error Removing Scratch: " + + StringUtils.stringifyException(e)); + } + } + fsScratchDirs.clear(); + } + + /** + * Remove any created directories for CTEs. + */ + public void removeMaterializedCTEs() { + // clean CTE tables + for (Table materializedTable : cteTables.values()) { + Path location = materializedTable.getDataLocation(); + try { + FileSystem fs = location.getFileSystem(conf); + boolean status = fs.delete(location, true); + LOG.info("Removed " + location + " for materialized " + + materializedTable.getTableName() + ", status=" + status); + } catch (IOException e) { + // ignore + LOG.warn("Error removing " + location + " for materialized " + materializedTable.getTableName() + + ": " + StringUtils.stringifyException(e)); + } + } + cteTables.clear(); + } + + private String nextPathId() { + return Integer.toString(pathid++); + } + + + private static final String MR_PREFIX = "-mr-"; + private static final String EXT_PREFIX = "-ext-"; + private static final String LOCAL_PREFIX = "-local-"; + + /** + * Check if path is for intermediate data + * @return true if a uri is a temporary uri for map-reduce intermediate data, + * false otherwise + */ + public boolean isMRTmpFileURI(String uriStr) { + return (uriStr.indexOf(executionId) != -1) && + (uriStr.indexOf(MR_PREFIX) != -1); + } + + public Path getMRTmpPath(URI uri) { + return new Path(getStagingDir(new Path(uri), !explain), MR_PREFIX + nextPathId()); + } + + /** + * Get a path to store map-reduce intermediate data in. + * + * @return next available path for map-red intermediate data + */ + public Path getMRTmpPath() { + return new Path(getMRScratchDir(), MR_PREFIX + + nextPathId()); + } + + /** + * Get a tmp path on local host to store intermediate data. + * + * @return next available tmp path on local fs + */ + public Path getLocalTmpPath() { + return new Path(getLocalScratchDir(true), LOCAL_PREFIX + nextPathId()); + } + + /** + * Get a path to store tmp data destined for external Path. + * + * @param path external Path to which the tmp data has to be eventually moved + * @return next available tmp path on the file system corresponding extURI + */ + public Path getExternalTmpPath(Path path) { + URI extURI = path.toUri(); + if (extURI.getScheme().equals("viewfs")) { + // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. + // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir + // on same namespace as tbl dir. + return getExtTmpPathRelTo(path.getParent()); + } + return new Path(getExternalScratchDir(extURI), EXT_PREFIX + + nextPathId()); + } + + /** + * This is similar to getExternalTmpPath() with difference being this method returns temp path + * within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp + * path within /tmp + */ + public Path getExtTmpPathRelTo(Path path) { + return new Path(getStagingDir(path, !explain), EXT_PREFIX + nextPathId()); + } + + /** + * @return the resFile + */ + public Path getResFile() { + return resFile; + } + + /** + * @param resFile + * the resFile to set + */ + public void setResFile(Path resFile) { + this.resFile = resFile; + resDir = null; + resDirPaths = null; + resDirFilesNum = 0; + } + + /** + * @return the resDir + */ + public Path getResDir() { + return resDir; + } + + /** + * @param resDir + * the resDir to set + */ + public void setResDir(Path resDir) { + this.resDir = resDir; + resFile = null; + + resDirFilesNum = 0; + resDirPaths = null; + } + + public void clear() throws IOException { + if (resDir != null) { + try { + FileSystem fs = resDir.getFileSystem(conf); + fs.delete(resDir, true); + } catch (IOException e) { + LOG.info("Context clear error: " + StringUtils.stringifyException(e)); + } + } + + if (resFile != null) { + try { + FileSystem fs = resFile.getFileSystem(conf); + fs.delete(resFile, false); + } catch (IOException e) { + LOG.info("Context clear error: " + StringUtils.stringifyException(e)); + } + } + removeMaterializedCTEs(); + removeScratchDir(); + originalTracker = null; + setNeedLockMgr(false); + } + + public DataInput getStream() { + try { + if (!initialized) { + initialized = true; + if ((resFile == null) && (resDir == null)) { + return null; + } + + if (resFile != null) { + return resFile.getFileSystem(conf).open(resFile); + } + + resFs = resDir.getFileSystem(conf); + FileStatus status = resFs.getFileStatus(resDir); + assert status.isDir(); + FileStatus[] resDirFS = resFs.globStatus(new Path(resDir + "/*"), FileUtils.HIDDEN_FILES_PATH_FILTER); + resDirPaths = new Path[resDirFS.length]; + int pos = 0; + for (FileStatus resFS : resDirFS) { + if (!resFS.isDir()) { + resDirPaths[pos++] = resFS.getPath(); + } + } + if (pos == 0) { + return null; + } + + return resFs.open(resDirPaths[resDirFilesNum++]); + } else { + return getNextStream(); + } + } catch (FileNotFoundException e) { + LOG.info("getStream error: " + StringUtils.stringifyException(e)); + return null; + } catch (IOException e) { + LOG.info("getStream error: " + StringUtils.stringifyException(e)); + return null; + } + } + + private DataInput getNextStream() { + try { + if (resDir != null && resDirFilesNum < resDirPaths.length + && (resDirPaths[resDirFilesNum] != null)) { + return resFs.open(resDirPaths[resDirFilesNum++]); + } + } catch (FileNotFoundException e) { + LOG.info("getNextStream error: " + StringUtils.stringifyException(e)); + return null; + } catch (IOException e) { + LOG.info("getNextStream error: " + StringUtils.stringifyException(e)); + return null; + } + + return null; + } + + public void resetStream() { + if (initialized) { + resDirFilesNum = 0; + initialized = false; + } + } + + /** + * Little abbreviation for StringUtils. + */ + private static boolean strEquals(String str1, String str2) { + return org.apache.commons.lang.StringUtils.equals(str1, str2); + } + + /** + * Set the token rewrite stream being used to parse the current top-level SQL + * statement. Note that this should <b>not</b> be used for other parsing + * activities; for example, when we encounter a reference to a view, we switch + * to a new stream for parsing the stored view definition from the catalog, + * but we don't clobber the top-level stream in the context. + * + * @param tokenRewriteStream + * the stream being used + */ + public void setTokenRewriteStream(TokenRewriteStream tokenRewriteStream) { + assert (this.tokenRewriteStream == null); + this.tokenRewriteStream = tokenRewriteStream; + } + + /** + * @return the token rewrite stream being used to parse the current top-level + * SQL statement, or null if it isn't available (e.g. for parser + * tests) + */ + public TokenRewriteStream getTokenRewriteStream() { + return tokenRewriteStream; + } + + /** + * Generate a unique executionId. An executionId, together with user name and + * the configuration, will determine the temporary locations of all intermediate + * files. + * + * In the future, users can use the executionId to resume a query. + */ + public static String generateExecutionId() { + Random rand = new Random(); + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS"); + String executionId = "hive_" + format.format(new Date()) + "_" + + Math.abs(rand.nextLong()); + return executionId; + } + + /** + * Does Hive wants to run tasks entirely on the local machine + * (where the query is being compiled)? + * + * Today this translates into running hadoop jobs locally + */ + public boolean isLocalOnlyExecutionMode() { + // Always allow spark to run in a cluster mode. Without this, depending on + // user's local hadoop settings, true may be returned, which causes plan to be + // stored in local path. + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { + return false; + } + + return ShimLoader.getHadoopShims().isLocalMode(conf); + } + + public List<HiveLock> getHiveLocks() { + return hiveLocks; + } + + public void setHiveLocks(List<HiveLock> hiveLocks) { + this.hiveLocks = hiveLocks; + } + + public HiveTxnManager getHiveTxnManager() { + return hiveTxnManager; + } + + public void setHiveTxnManager(HiveTxnManager txnMgr) { + hiveTxnManager = txnMgr; + } + + public void setOriginalTracker(String originalTracker) { + this.originalTracker = originalTracker; + } + + public void restoreOriginalTracker() { + if (originalTracker != null) { + ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, originalTracker); + originalTracker = null; + } + } + + public void addCS(String path, ContentSummary cs) { + pathToCS.put(path, cs); + } + + public ContentSummary getCS(Path path) { + return getCS(path.toString()); + } + + public ContentSummary getCS(String path) { + return pathToCS.get(path); + } + + public Map<String, ContentSummary> getPathToCS() { + return pathToCS; + } + + public Configuration getConf() { + return conf; + } + + /** + * @return the isHDFSCleanup + */ + public boolean isHDFSCleanup() { + return isHDFSCleanup; + } + + /** + * @param isHDFSCleanup the isHDFSCleanup to set + */ + public void setHDFSCleanup(boolean isHDFSCleanup) { + this.isHDFSCleanup = isHDFSCleanup; + } + + public boolean isNeedLockMgr() { + return needLockMgr; + } + + public void setNeedLockMgr(boolean needLockMgr) { + this.needLockMgr = needLockMgr; + } + + public int getTryCount() { + return tryCount; + } + + public void setTryCount(int tryCount) { + this.tryCount = tryCount; + } + + public void setAcidOperation(AcidUtils.Operation op) { + acidOperation = op; + } + + public AcidUtils.Operation getAcidOperation() { + return acidOperation; + } + + public String getCboInfo() { + return cboInfo; + } + + public void setCboInfo(String cboInfo) { + this.cboInfo = cboInfo; + } + + public boolean isCboSucceeded() { + return cboSucceeded; + } + + public void setCboSucceeded(boolean cboSucceeded) { + this.cboSucceeded = cboSucceeded; + } + + public Table getMaterializedTable(String cteName) { + return cteTables.get(cteName); + } + + public void addMaterializedTable(String cteName, Table table) { + cteTables.put(cteName, table); + } + + public AtomicInteger getSequencer() { + return sequencer; + } + + public CompilationOpContext getOpContext() { + return opContext; + } + + public Heartbeater getHeartbeater() { + return heartbeater; + } + + public void setHeartbeater(Heartbeater heartbeater) { + this.heartbeater = heartbeater; + } + + public void checkHeartbeaterLockException() throws LockException { + if (getHeartbeater() != null && getHeartbeater().getLockException() != null) { + throw getHeartbeater().getLockException(); + } + } + + public boolean isSkipTableMasking() { + return skipTableMasking; + } + + public void setSkipTableMasking(boolean skipTableMasking) { + this.skipTableMasking = skipTableMasking; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/Driver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index d6f30e4..08bd040 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; +import org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; @@ -92,6 +93,7 @@ import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -324,6 +326,11 @@ public class Driver implements CommandProcessor { this(new QueryState(conf), null); } + public Driver(HiveConf conf, Context ctx) { + this(new QueryState(conf), null); + this.ctx = ctx; + } + public Driver(HiveConf conf, String userName) { this(new QueryState(conf), userName); } @@ -388,14 +395,13 @@ public class Driver implements CommandProcessor { LOG.warn("WARNING! Query command could not be redacted." + e); } - if (ctx != null) { - closeInProcess(false); + if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) { + close(); } - if (isInterrupted()) { return handleInterruption("at beginning of compilation."); //indicate if need clean resource } - + if (resetTaskIds) { TaskFactory.resetId(); } @@ -435,7 +441,10 @@ public class Driver implements CommandProcessor { return handleInterruption("before parsing and analysing the query"); } - ctx = new Context(conf); + if (ctx == null) { + ctx = new Context(conf); + } + ctx.setTryCount(getTryCount()); ctx.setCmd(command); ctx.setHDFSCleanup(true); http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index a183b9b..a899964 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.ql.session.SessionState; @@ -407,6 +408,9 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab @Override public int execute(DriverContext driverContext) { + if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { + return 0; + } try { Hive db = getHive(); return persistColumnStats(db); http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 42992bb..9510058 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -956,6 +956,7 @@ public abstract class CommonJoinOperator<T extends JoinDesc> extends } } Arrays.fill(storage, null); + super.closeOp(abort); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 2e04d1d..92859fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -140,6 +140,7 @@ import org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.PreInsertTableDesc; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.AbortTxnsDesc; import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.plan.AlterDatabaseDesc; @@ -292,6 +293,9 @@ public class DDLTask extends Task<DDLWork> implements Serializable { @Override public int execute(DriverContext driverContext) { + if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { + return 0; + } // Create the db Hive db; http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 403d57c..fb39188 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -271,7 +271,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { // Because of the implementation of the JsonParserFactory, we are sure // that we can get a TezJsonParser. JsonParser jsonParser = JsonParserFactory.getParser(conf); - work.setFormatted(true); + work.getConfig().setFormatted(true); JSONObject jsonPlan = getJSONPlan(out, work); if (work.getCboInfo() != null) { jsonPlan.put("cboInfo", work.getCboInfo()); @@ -282,8 +282,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { // if there is anything wrong happen, we bail out. LOG.error("Running explain user level has problem: " + e.toString() + ". Falling back to normal explain"); - work.setFormatted(false); - work.setUserLevelExplain(false); + work.getConfig().setFormatted(false); + work.getConfig().setUserLevelExplain(false); jsonPlan = getJSONPlan(out, work); } } else { http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index fbb4d6e..5f9ed4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -651,6 +651,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements @Override public void process(Object row, int tag) throws HiveException { + runTimeNumRows++; /* Create list bucketing sub-directory only if stored-as-directories is on. */ String lbDirName = null; lbDirName = (lbCtx == null) ? null : generateListBucketingDirName(row); @@ -1082,6 +1083,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } } fsp = prevFsp = null; + super.closeOp(abort); } /** @@ -1226,7 +1228,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } } } - sContext.setIndexForTezUnion(this.conf.getIndexInTezUnion()); + sContext.setIndexForTezUnion(this.getIndexForTezUnion()); if (!statsPublisher.closeConnection(sContext)) { // The original exception is lost. // Not changing the interface to maintain backward compatibility http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index ef168fa..7c40b58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -1028,7 +1028,6 @@ public class GroupByOperator extends Operator<GroupByDesc> { * @throws HiveException */ private void forward(Object[] keys, AggregationBuffer[] aggs) throws HiveException { - if (forwardCache == null) { forwardCache = new Object[outputKeyLength + aggs.length]; } @@ -1123,6 +1122,7 @@ public class GroupByOperator extends Operator<GroupByDesc> { } } hashAggregations = null; + super.closeOp(abort); } // Group by contains the columns needed - no need to aggregate from children http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index 3a6fdd5..0282763 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -20,9 +20,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.IOException; import java.io.Serializable; -import java.util.Collection; import java.util.List; -import java.util.concurrent.Future; import org.slf4j.Logger; import org.apache.hadoop.conf.Configuration; http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java index 9676d70..d4ebbd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java @@ -88,6 +88,7 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable { if (!isMap && currCount < leastRow) { throw new HiveException("No sufficient row found"); } + super.closeOp(abort); } } http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java index 9bf363c..0633854 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ListSinkOperator.java @@ -93,6 +93,7 @@ public class ListSinkOperator extends Operator<ListSinkDesc> { try { res.add(fetcher.convert(row, inputObjInspectors[0])); numRows++; + runTimeNumRows++; } catch (Exception e) { throw new HiveException(e); } http://git-wip-us.apache.org/repos/asf/hive/blob/831bd7d8/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 671b851..e1381be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol; import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc; @@ -248,6 +249,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { public int execute(DriverContext driverContext) { try { + if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { + return 0; + } Hive db = getHive(); // Do any hive related operations like moving tables and files