This is an automated email from the ASF dual-hosted git repository. vinoth pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push: new 1a29d46 - Fix realtime queries by removing COLUMN_ID and COLUMN_NAME cache in inputformat (#814) 1a29d46 is described below commit 1a29d46a5707d58a33f170ce6877ebbe814715e2 Author: n3nash <nagar...@uber.com> AuthorDate: Fri Aug 2 16:06:34 2019 -0700 - Fix realtime queries by removing COLUMN_ID and COLUMN_NAME cache in inputformat (#814) - Hive on Spark will NOT work for RT tables after this patch --- .../hadoop/realtime/HoodieRealtimeInputFormat.java | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java index 1426373..9fede56 100644 --- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java +++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java @@ -69,15 +69,10 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf public static final int HOODIE_COMMIT_TIME_COL_POS = 0; public static final int HOODIE_RECORD_KEY_COL_POS = 2; public static final int HOODIE_PARTITION_PATH_COL_POS = 3; - // Track the read column ids and names to be used throughout the execution and lifetime of this task - // Needed for Hive on Spark. Our theory is that due to + // Hive on Spark queries do not work with RT tables. Our theory is that due to // {@link org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher} // not handling empty list correctly, the ParquetRecordReaderWrapper ends up adding the same column ids multiple // times which ultimately breaks the query. - // TODO : Find why RO view works fine but RT doesn't, JIRA: https://issues.apache.org/jira/browse/HUDI-151 - public static String READ_COLUMN_IDS; - public static String READ_COLUMN_NAMES; - public static boolean isReadColumnsSet = false; @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { @@ -208,11 +203,6 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf HOODIE_COMMIT_TIME_COL_POS); configuration = addProjectionField(configuration, HoodieRecord.PARTITION_PATH_METADATA_FIELD, HOODIE_PARTITION_PATH_COL_POS); - if (!isReadColumnsSet) { - READ_COLUMN_IDS = configuration.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR); - READ_COLUMN_NAMES = configuration.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); - isReadColumnsSet = true; - } return configuration; } @@ -240,10 +230,6 @@ public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Conf "HoodieRealtimeRecordReader can only work on HoodieRealtimeFileSplit and not with " + split); - // Reset the original column ids and names - job.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, READ_COLUMN_IDS); - job.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, READ_COLUMN_NAMES); - return new HoodieRealtimeRecordReader((HoodieRealtimeFileSplit) split, job, super.getRecordReader(split, job, reporter)); }