[ https://issues.apache.org/jira/browse/HIVE-14792?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16296208#comment-16296208 ]
Hive QA commented on HIVE-14792: -------------------------------- Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12902717/HIVE-14792.3.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 87 failed/errored test(s), 11528 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[acid_subquery] (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[alter_table_column_stats] (batchId=64) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[alter_table_stats_status] (batchId=54) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[analyze_tbl_date] (batchId=34) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_1] (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_6] (batchId=46) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_3] (batchId=34) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[authorization_view_4] (batchId=8) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_1] (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_2] (batchId=83) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[autoColumnStats_3] (batchId=55) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_join25] (batchId=72) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[columnStatsUpdateForStatsOptimizer_2] (batchId=29) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[columnstats_infinity] (batchId=76) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[create_like_tbl_props] (batchId=73) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[create_table_like_stats] (batchId=59) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[deleteAnalyze] (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[delete_all_partitioned] (batchId=28) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[delete_where_partitioned] (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[delete_whole_partition] (batchId=9) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[llap_acid] (batchId=80) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[llap_acid_fast] (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[mapjoin_hook] (batchId=12) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[outer_reference_windowed] (batchId=40) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[ppd_join5] (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[quotedid_stats] (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[stats14] (batchId=63) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[stats15] (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[stats9] (batchId=26) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[stats_invalidation] (batchId=75) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[temp_table_display_colstats_tbllvl] (batchId=77) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[union_stats] (batchId=23) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[update_all_partitioned] (batchId=51) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[update_where_partitioned] (batchId=62) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[llap_smb] (batchId=151) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[acid_no_buckets] (batchId=166) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[autoColumnStats_1] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[autoColumnStats_2] (batchId=169) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[bucketsortoptimize_insert_2] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[columnStatsUpdateForStatsOptimizer_1] (batchId=159) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[column_table_stats] (batchId=167) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[column_table_stats_orc] (batchId=153) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[deleteAnalyze] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_all_partitioned] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_where_partitioned] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_whole_partition] (batchId=153) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynpart_sort_optimization_acid] (batchId=161) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[hybridgrace_hashjoin_2] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_orig_table_use_metadata] (batchId=165) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[llap_acid] (batchId=169) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[llap_acid_fast] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[mergejoin] (batchId=165) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[schema_evol_orc_acid_part_update] (batchId=167) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[schema_evol_orc_acidvec_part_update] (batchId=154) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[sysdb] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[union_stats] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_all_partitioned] (batchId=163) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_where_partitioned] (batchId=165) org.apache.hadoop.hive.cli.TestMiniSparkOnYarnCliDriver.testCliDriver[bucketizedhiveinputformat] (batchId=178) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_5] (batchId=102) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_fail_3] (batchId=92) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_fail_7] (batchId=92) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_part] (batchId=93) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_view_2] (batchId=93) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_view_6] (batchId=93) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_view_7] (batchId=93) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[authorization_view_disable_cbo_2] (batchId=92) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[auto_sortmerge_join_10] (batchId=138) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[bucketsortoptimize_insert_7] (batchId=128) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[ppd_join5] (batchId=120) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats14] (batchId=133) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats15] (batchId=110) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[stats9] (batchId=116) org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver[subquery_multi] (batchId=113) org.apache.hadoop.hive.cli.control.TestDanglingQOuts.checkDanglingQOut (batchId=209) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testMerge3Way01 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testMerge3Way02 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testMergePartitioned01 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testMergePartitioned02 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking10 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking11 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking3 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking5 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking7 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking8 (batchId=291) org.apache.hadoop.hive.ql.lockmgr.TestDbTxnManager2.testWriteSetTracking9 (batchId=291) org.apache.hadoop.hive.ql.parse.TestReplicationScenarios.testConstraints (batchId=226) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/8310/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/8310/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-8310/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.YetusPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 87 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12902717 - PreCommit-HIVE-Build > AvroSerde reads the remote schema-file at least once per mapper, per table > reference. > ------------------------------------------------------------------------------------- > > Key: HIVE-14792 > URL: https://issues.apache.org/jira/browse/HIVE-14792 > Project: Hive > Issue Type: Bug > Affects Versions: 1.2.1, 2.1.0 > Reporter: Mithun Radhakrishnan > Assignee: Mithun Radhakrishnan > Labels: TODOC2.2, TODOC2.4 > Fix For: 3.0.0, 2.4.0, 2.2.1 > > Attachments: HIVE-14792.1.patch, HIVE-14792.3.patch > > > Avro tables that use "external" schema files stored on HDFS can cause > excessive calls to {{FileSystem::open()}}, especially for queries that spawn > large numbers of mappers. > This is because of the following code in {{AvroSerDe::initialize()}}: > {code:title=AvroSerDe.java|borderStyle=solid} > public void initialize(Configuration configuration, Properties properties) > throws SerDeException { > // ... > if (hasExternalSchema(properties) > || columnNameProperty == null || columnNameProperty.isEmpty() > || columnTypeProperty == null || columnTypeProperty.isEmpty()) { > schema = determineSchemaOrReturnErrorSchema(configuration, properties); > } else { > // Get column names and sort order > columnNames = Arrays.asList(columnNameProperty.split(",")); > columnTypes = > TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); > schema = getSchemaFromCols(properties, columnNames, columnTypes, > columnCommentProperty); > > properties.setProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), > schema.toString()); > } > // ... > } > {code} > For tables using {{avro.schema.url}}, every time the SerDe is initialized > (i.e. at least once per mapper), the schema file is read remotely. For > queries with thousands of mappers, this leads to a stampede to the handful > (3?) datanodes that host the schema-file. In the best case, this causes > slowdowns. > It would be preferable to distribute the Avro-schema to all mappers as part > of the job-conf. The alternatives aren't exactly appealing: > # One can't rely solely on the {{column.list.types}} stored in the Hive > metastore. (HIVE-14789). > # {{avro.schema.literal}} might not always be usable, because of the > size-limit on table-parameters. The typical size of the Avro-schema file is > between 0.5-3MB, in my limited experience. Bumping the max table-parameter > size isn't a great solution. > If the {{avro.schema.file}} were read during query-planning, and made > available as part of table-properties (but not serialized into the > metastore), the downstream logic will remain largely intact. I have a patch > that does this. -- This message was sent by Atlassian JIRA (v6.4.14#64029)