Github user rmetzger commented on a diff in the pull request:
https://github.com/apache/flink/pull/411#discussion_r24833839
--- Diff:
flink-staging/flink-hcatalog/src/main/java/org/apache/flink/hcatalog/HCatInputFormatBase.java
---
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.hcatalog;
+
+import org.apache.flink.api.common.io.InputFormat;
+import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
+import org.apache.flink.api.common.io.statistics.BaseStatistics;
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
+import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.hadoop.mapreduce.utils.HadoopUtils;
+import org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit;
+import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
+import org.apache.flink.api.java.typeutils.TupleTypeInfo;
+import org.apache.flink.api.java.typeutils.WritableTypeInfo;
+import org.apache.flink.core.io.InputSplitAssigner;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hive.hcatalog.common.HCatException;
+import org.apache.hive.hcatalog.common.HCatUtil;
+import org.apache.hive.hcatalog.data.DefaultHCatRecord;
+import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A InputFormat to read from HCatalog tables.
+ * The InputFormat supports projection (selection and order of fields) and
partition filters.
+ *
+ * Data can be returned as {@link
org.apache.hive.hcatalog.data.HCatRecord} or Flink {@link
org.apache.flink.api.java.tuple.Tuple}.
+ * Flink Tuples are only supported for primitive type fields
+ * (no STRUCT, ARRAY, or MAP data types) and have a size limitation.
+ *
+ * @param
+ */
+public abstract class HCatInputFormatBase implements InputFormat, ResultTypeQueryable {
+
+ private static final long serialVersionUID = 1L;
+
+ private Configuration configuration;
+
+ private org.apache.hive.hcatalog.mapreduce.HCatInputFormat
hCatInputFormat;
+ private RecordReader recordReader;
+ private boolean fetched = false;
+ private boolean hasNext;
+
+ protected String[] fieldNames = new String[0];
+ protected HCatSchema outputSchema;
+
+ private TypeInformation resultType;
+
+ public HCatInputFormatBase() { }
+
+ /**
+* Creates a HCatInputFormat for the given database and table.
+* By default, the InputFormat returns {@link
org.apache.hive.hcatalog.data.HCatRecord}.
+* The return type of the InputFormat can be changed to Flink {@link
org.apache.flink.api.java.tuple.Tuple} by calling
+* {@link HCatInputFormatBase#asFlinkTuples()}.
+*
+* @param database The name of the database to read from.
+* @param table The name of the table to read.
+* @throws java.io.IOException
+*/
+ public HCatInputFormatBase(String database, String table) throws
IOException {
+ this(database, table, new Configuration());
+ }
+
+ /**
+* Creates a HCatInputFormat for the given database, table, and
+* {@link org.apache.hadoop.conf.Configuration}.
+* By default, the InputFormat returns {@link
org