My Steps:
I define a class "public class myInputFormat extends TextInputFormat
implements JobConfigurable" to specify input format.

hive> add jar /home/biadmin/hiveudf/myFileFormat.jar;
Added /home/biadmin/hiveudf/myFileFormat.jar to class path
Added resource: /home/biadmin/hiveudf/myFileFormat.jar

hive> list jars;
/home/biadmin/hiveudf/myFileFormat.jar

hive> create table IOtable(str1 string, str2 string, str3 string) stored as
INPUTFORMAT 'com.mytest.fileformat.myInputFormat' OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' ;
OK
Time taken: 0.081 seconds

hive> load data local inpath '/home/biadmin/hivetbl/IOtable_data.txt' into
table IOtable;
Copying data from file:/home/biadmin/hivetbl/IOtable_data.txt
Copying file: file:/home/biadmin/hivetbl/IOtable_data.txt
Loading data to table default.iotable
OK
Time taken: 0.147 seconds

hive>  select * from IOtable;
OK
Failed with exception java.io.IOException:java.io.IOException: Cannot
create an instance of InputFormat class com.mytest.fileformat.myInputFormat
as specified in mapredWork!
Time taken: 0.059 seconds




*Here is my source code :*
===============================
package com.mytest.fileformat;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.LineRecordReader;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.TextInputFormat;

@SuppressWarnings("deprecation")
public class myInputFormat extends TextInputFormat implements
JobConfigurable {
 TextInputFormat format;
    JobConf job;

public myInputFormat() {
        format = new TextInputFormat();
    }

 @Override
public void configure(JobConf job) {
        this.job = job;
        format.configure(job);
}
    public RecordReader<LongWritable, Text> getRecordReader(
            InputSplit genericSplit, JobConf job, Reporter reporter)
            throws IOException {

        reporter.setStatus(genericSplit.toString());
        return new myLineRecordReader(job, (FileSplit) genericSplit);
    }


    public static class myLineRecordReader implements
    RecordReader<LongWritable, Text> {
      LineRecordReader lineReader;
      LongWritable lineKey;
      Text lineValue;

      public myLineRecordReader(JobConf job, FileSplit split) throws
IOException {
        lineReader = new LineRecordReader(job, split);
        lineKey = lineReader.createKey();
        lineValue = lineReader.createValue();
      }

      public boolean next(LongWritable key, Text value) throws IOException {
        while (lineReader.next(lineKey, lineValue)) {
          String strReplace = lineValue.toString().toLowerCase().replace(
"$$$$" , "\001" );
          Text txtReplace = new Text();
          txtReplace.set(strReplace);
          value.set(txtReplace.getBytes(), 0, txtReplace.getLength());
          return true ;
         }
         // no more data
         return false;
      }  /** end next **/


      public LongWritable createKey() {
        return lineReader.createKey();
      }
      public Text createValue() {
        return lineReader.createValue();
      }
      public long getPos() throws IOException{
        return lineReader.getPos();
      }
      public float getProgress() throws IOException{
        return lineReader.getProgress();
      }
      public void close() throws IOException{
        lineReader.close();
      }
     }  /** end class myLineRecordReader **/
}

Reply via email to