Hi,

I have a SequenceFile which contains several jpeg images with (image name, 
image bytes) as key-value pairs. My objective is to count the no. of images by 
grouping them by the source, something like this :

Nikon Coolpix  100
Sony Cybershot 251
N82 100


The MR code is :

package com.hadoop.basics;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.drew.imaging.ImageMetadataReader;
import com.drew.imaging.ImageProcessingException;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.exif.ExifIFD0Directory;

public class ImageSummary extends Configured implements Tool {

            public static class ImageSourceMapper extends
                                    Mapper<Text, BytesWritable, Text, 
IntWritable> {

                        private static int tagId = 272;
                        private static final IntWritable one = new 
IntWritable(1);

                        public void map(Text imageName, BytesWritable 
imageBytes,
                                                Context context) throws 
IOException, InterruptedException {
                                    // TODO Auto-generated method stub

                                    System.out.println("In the map method, 
image is "
                                                            + 
imageName.toString());

                                    byte[] imageInBytes = imageBytes.getBytes();
                                    ByteArrayInputStream bais = new 
ByteArrayInputStream(imageInBytes);
                                    BufferedInputStream bis = new 
BufferedInputStream(bais);

                                    Metadata imageMD = null;

                                    try {
                                                imageMD = 
ImageMetadataReader.readMetadata(bis, true);
                                    } catch (ImageProcessingException e) {
                                                // TODO Auto-generated catch 
block
                                                System.out.println("Got an 
ImageProcessingException !");
                                                e.printStackTrace();
                                    }

                                    Directory exifIFD0Directory = imageMD
                                                            
.getDirectory(ExifIFD0Directory.class);

                                    String imageSource = 
exifIFD0Directory.getString(tagId);

                                    System.out.println(imageName.toString() + " 
is taken using "
                                                            + imageSource);

                                    context.write(new Text(imageSource), one);

                                    System.out.println("Returning from the map 
method");
                        }
            }

            public static class ImageSourceReducer extends
                                    Reducer<Text, IntWritable, Text, 
IntWritable> {

                        public void reduce(Text imageSource, 
Iterator<IntWritable> counts,
                                                Context context) throws 
IOException, InterruptedException {
                                    // TODO Auto-generated method stub

                                    System.out.println("In the reduce method");

                                    int finalCount = 0;

                                    while (counts.hasNext()) {
                                                finalCount += 
counts.next().get();
                                    }

                                    context.write(imageSource, new 
IntWritable(finalCount));

                                    System.out.println("Returning from the 
reduce method");
                        }

            }

            public static void main(String[] args) throws Exception {
                        ToolRunner.run(new ImageSummary(), args);
            }

            @Override
            public int run(String[] args) throws Exception {
                        // TODO Auto-generated method stub

                        System.out.println("In ImageSummary.run(...)");

                        Configuration configuration = getConf();

                        Job job = new Job(configuration, "Image_Source");
                        job.setJarByClass(getClass());

                        job.setInputFormatClass(SequenceFileInputFormat.class);
                        job.setOutputFormatClass(TextOutputFormat.class);

                        job.setMapperClass(ImageSourceMapper.class);
                        job.setCombinerClass(ImageSourceReducer.class);
                        job.setReducerClass(ImageSourceReducer.class);

                        job.setMapOutputKeyClass(Text.class);
                        job.setMapOutputValueClass(IntWritable.class);

                        job.setOutputKeyClass(Text.class);
                        job.setOutputValueClass(IntWritable.class);

                        SequenceFileInputFormat.addInputPath(job, new 
Path(args[0]));
                        TextOutputFormat.setOutputPath(job, new Path(args[1]));

                        System.out.println("Submitting job");

                        job.waitForCompletion(true);

                        int jobStatus = job.isSuccessful() ? 0 : -1;

                        System.out.println("Returning jobStatus = " + 
jobStatus);

                        return jobStatus;
            }
}


The command :

hadoop jar /home/hduser/dumphere/codes/hadoop/imageops.jar 
com.hadoop.basics.ImageSummary "/scratchpad/imageOps/WholeImageSeqFile" 
"/scratchpad/imageOps/cnt"


The part-file 
(/<http://172.25.6.71:50075/browseDirectory.jsp?dir=/&namenodeInfoPort=50070>scratchpad<http://172.25.6.71:50075/browseDirectory.jsp?dir=/scratchpad&namenodeInfoPort=50070>/imageOps<http://172.25.6.71:50075/browseDirectory.jsp?dir=/scratchpad/imageOps&namenodeInfoPort=50070>/cnt<http://172.25.6.71:50075/browseDirectory.jsp?dir=/scratchpad/imageOps/cnt&namenodeInfoPort=50070>/part-r-00000)
COOLPIX L120             1
COOLPIX L120             1
K750i 1

The mapper stdout logs :

stdout logs
In the map method, image is It's a long road....JPG
It's a long road....JPG is taken using COOLPIX L120
Returning from the map method
In the map method, image is Every man is a mountainside....JPG
Every man is a mountainside....JPG is taken using COOLPIX L120
Returning from the map method
In the map method, image is mystic.JPG
mystic.JPG is taken using K750i
Returning from the map method
But nothing is reflected in stdout logs of the reducer.
What have I missed?

Regards,
Omkar Joshi



________________________________
The contents of this e-mail and any attachment(s) may contain confidential or 
privileged information for the intended recipient(s). Unintended recipients are 
prohibited from taking action on the basis of information in this e-mail and 
using or disseminating the information, and must notify the sender and delete 
it from their system. L&T Infotech will not accept responsibility or liability 
for the accuracy or completeness of, or the presence of any virus or disabling 
code in this e-mail"

Reply via email to