Repository: hive Updated Branches: refs/heads/master 24ce099bd -> 5ec65ee28
HIVE-10460: change the key of Parquet Record to Nullwritable instead of void (Ferdinand Xu, reviewed by Sergio Pena) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ec65ee2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ec65ee2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ec65ee2 Branch: refs/heads/master Commit: 5ec65ee2847ad1c92b92c33b5446ba046b4a80ef Parents: 24ce099 Author: Sergio Pena <sergio.p...@cloudera.com> Authored: Tue Apr 28 10:10:50 2015 -0700 Committer: Sergio Pena <sergio.p...@cloudera.com> Committed: Tue Apr 28 10:10:50 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java | 7 ++++--- .../hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java | 7 ++++--- .../hive/ql/io/parquet/VectorizedParquetInputFormat.java | 2 +- .../hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java | 7 ++++--- .../hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java | 7 ++++--- .../hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java | 5 +++-- 6 files changed, 20 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/5ec65ee2/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index 0391229..f4f0f07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.RecordReader; @@ -34,7 +35,7 @@ import parquet.hadoop.ParquetInputFormat; * NOTE: With HIVE-9235 we removed "implements VectorizedParquetInputFormat" since all data types * are not currently supported. Removing the interface turns off vectorization. */ -public class MapredParquetInputFormat extends FileInputFormat<Void, ArrayWritable> { +public class MapredParquetInputFormat extends FileInputFormat<NullWritable, ArrayWritable> { private static final Log LOG = LogFactory.getLog(MapredParquetInputFormat.class); @@ -53,7 +54,7 @@ public class MapredParquetInputFormat extends FileInputFormat<Void, ArrayWritabl @SuppressWarnings({ "unchecked", "rawtypes" }) @Override - public org.apache.hadoop.mapred.RecordReader<Void, ArrayWritable> getRecordReader( + public org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> getRecordReader( final org.apache.hadoop.mapred.InputSplit split, final org.apache.hadoop.mapred.JobConf job, final org.apache.hadoop.mapred.Reporter reporter @@ -69,7 +70,7 @@ public class MapredParquetInputFormat extends FileInputFormat<Void, ArrayWritabl if (LOG.isDebugEnabled()) { LOG.debug("Using row-mode record reader"); } - return (RecordReader<Void, ArrayWritable>) + return (RecordReader<NullWritable, ArrayWritable>) new ParquetRecordReaderWrapper(realInput, split, job, reporter); } } catch (final InterruptedException e) { http://git-wip-us.apache.org/repos/asf/hive/blob/5ec65ee2/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java index 8380117..c6fb26c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobConf; @@ -46,8 +47,8 @@ import parquet.hadoop.ParquetOutputFormat; * A Parquet OutputFormat for Hive (with the deprecated package mapred) * */ -public class MapredParquetOutputFormat extends FileOutputFormat<Void, ParquetHiveRecord> implements - HiveOutputFormat<Void, ParquetHiveRecord> { +public class MapredParquetOutputFormat extends FileOutputFormat<NullWritable, ParquetHiveRecord> + implements HiveOutputFormat<NullWritable, ParquetHiveRecord> { private static final Log LOG = LogFactory.getLog(MapredParquetOutputFormat.class); @@ -67,7 +68,7 @@ public class MapredParquetOutputFormat extends FileOutputFormat<Void, ParquetHiv } @Override - public RecordWriter<Void, ParquetHiveRecord> getRecordWriter( + public RecordWriter<NullWritable, ParquetHiveRecord> getRecordWriter( final FileSystem ignored, final JobConf job, final String name, http://git-wip-us.apache.org/repos/asf/hive/blob/5ec65ee2/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java index 3bb3eb1..843e079 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java @@ -53,7 +53,7 @@ public class VectorizedParquetInputFormat extends FileInputFormat<NullWritable, private final ParquetRecordReaderWrapper internalReader; private VectorizedRowBatchCtx rbCtx; private ArrayWritable internalValues; - private Void internalKey; + private NullWritable internalKey; private VectorColumnAssign[] assigners; public VectorizedParquetRecordReader( http://git-wip-us.apache.org/repos/asf/hive/blob/5ec65ee2/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index 0858fbd..5c36564 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; @@ -59,7 +60,7 @@ import parquet.schema.MessageTypeParser; import com.google.common.base.Strings; -public class ParquetRecordReaderWrapper implements RecordReader<Void, ArrayWritable> { +public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, ArrayWritable> { public static final Log LOG = LogFactory.getLog(ParquetRecordReaderWrapper.class); private final long splitLen; // for getPos() @@ -168,7 +169,7 @@ public class ParquetRecordReaderWrapper implements RecordReader<Void, ArrayWrit } @Override - public Void createKey() { + public NullWritable createKey() { return null; } @@ -196,7 +197,7 @@ public class ParquetRecordReaderWrapper implements RecordReader<Void, ArrayWrit } @Override - public boolean next(final Void key, final ArrayWritable value) throws IOException { + public boolean next(final NullWritable key, final ArrayWritable value) throws IOException { if (eof) { return false; } http://git-wip-us.apache.org/repos/asf/hive/blob/5ec65ee2/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java index 0d32e49..b7987a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/ParquetRecordWriterWrapper.java @@ -20,6 +20,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.JobContext; @@ -35,12 +36,12 @@ import parquet.hadoop.ParquetOutputFormat; import parquet.hadoop.metadata.CompressionCodecName; import parquet.hadoop.util.ContextUtil; -public class ParquetRecordWriterWrapper implements RecordWriter<Void, ParquetHiveRecord>, +public class ParquetRecordWriterWrapper implements RecordWriter<NullWritable, ParquetHiveRecord>, org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter { public static final Log LOG = LogFactory.getLog(ParquetRecordWriterWrapper.class); - private final org.apache.hadoop.mapreduce.RecordWriter<Void, ParquetHiveRecord> realWriter; + private final org.apache.hadoop.mapreduce.RecordWriter<NullWritable, ParquetHiveRecord> realWriter; private final TaskAttemptContext taskContext; public ParquetRecordWriterWrapper( @@ -106,7 +107,7 @@ public class ParquetRecordWriterWrapper implements RecordWriter<Void, ParquetHiv } @Override - public void write(final Void key, final ParquetHiveRecord value) throws IOException { + public void write(final NullWritable key, final ParquetHiveRecord value) throws IOException { try { realWriter.write(key, value); } catch (final InterruptedException e) { http://git-wip-us.apache.org/repos/asf/hive/blob/5ec65ee2/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java index 3a47673..127a093 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; @@ -122,12 +123,12 @@ public abstract class AbstractTestParquetDirect { public static List<ArrayWritable> read(Path parquetFile) throws IOException { List<ArrayWritable> records = new ArrayList<ArrayWritable>(); - RecordReader<Void, ArrayWritable> reader = new MapredParquetInputFormat(). + RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat(). getRecordReader(new FileSplit( parquetFile, 0, fileLength(parquetFile), (String[]) null), new JobConf(), null); - Void alwaysNull = reader.createKey(); + NullWritable alwaysNull = reader.createKey(); ArrayWritable record = reader.createValue(); while (reader.next(alwaysNull, record)) { records.add(record);