On Tue, Jun 3, 2008 at 6:17 AM, Lin Guo <[EMAIL PROTECTED]> wrote:
> I am wondering whether it is possible to deserialize the keys and values in a 
> hadoop output file where the output format is SequenceFileOutputFormat.

I wrote some code to do this, samples attached.
-Stuart
/* SeqKeyList.java - print list of keys in a SequenceFile
 *
 * Copyright (C) 2008 Stuart Sierra
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You
 * may obtain a copy of the License at
 * http:www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

import java.nio.charset.Charset;

/* From hadoop-*-core.jar, http://hadoop.apache.org/
 * Developed with Hadoop 0.16.3. */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.BytesWritable;


/** Prints the contents of a SequenceFile.
 *
 * @author Stuart Sierra, http://stuartsierra.com/
 */
public class SeqFilePrinter {

    private String inputFile;
    private LocalSetup setup;

    public SeqFilePrinter() throws Exception {
        setup = new LocalSetup();
    }

    /** Set the name of the input sequence file.
     *
     * @param filename   a local path string
     */
    public void setInput(String filename) {
        inputFile = filename;
    }

    /** Runs the process. Keys are printed to standard output;
     * information about the sequence file is printed to standard
     * error. */
    public void execute() throws Exception {
        Path path = new Path(inputFile);
        SequenceFile.Reader reader = 
            new SequenceFile.Reader(setup.getLocalFileSystem(), path, setup.getConf());

        try {
            System.out.println("Key type is " + reader.getKeyClassName());
            System.out.println("Value type is " + reader.getValueClassName());
            if (reader.isCompressed()) {
                System.err.println("Values are compressed.");
                if (reader.isBlockCompressed()) {
                    System.err.println("Records are block-compressed.");
                }
                System.err.println("Compression type is " + reader.getCompressionCodec().getClass().getName());
            }
            System.out.println("");

            Writable key = (Writable)(reader.getKeyClass().newInstance());
            Writable val = (Writable)(reader.getValueClass().newInstance());
            String value;
            while (reader.next(key, val)) {
                System.out.println("============================================================");
                System.out.println("KEY:\t" + key.toString());

                if (val instanceof BytesWritable) {
                    BytesWritable v = (BytesWritable)val;
                    value = new String(v.get(), 0, v.getSize());
                } else {
                    value = val.toString();
                }

                System.out.println("VALUE:\n" + value);
            }
        } finally {
            reader.close();
        }
    }

    public static void main(String[] args) {
        if (args.length != 1) {
            exitWithHelp();
        }

        try {
            SeqFilePrinter me = new SeqFilePrinter();
            me.setInput(args[0]);
            me.execute();
        } catch (Exception e) {
            e.printStackTrace();
            exitWithHelp();
        }
    }

    /** Prints usage instructions to standard error and exits. */
    public static void exitWithHelp() {
        System.err.println("Usage: java SeqFilePrinter <sequence-file>\n" +
                           "Prints the contents of the sequence file.");
        System.exit(1);
    }
}
/* LocalSetup.java -- support for the Hadoop API outside of Hadoop
 *
 * Copyright (C) 2008 Stuart Sierra
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you
 * may not use this file except in compliance with the License. You
 * may obtain a copy of the License at
 * http:www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

/* From hadoop-*-core.jar, http://hadoop.apache.org/
 * Developed with Hadoop 0.16.3. */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;

/** Provides Hadoop configuration and local file system objects for
 * other classes.  This is for situations where you want to use some
 * part of the Hadoop code outside of the Hadoop Map/Reduce
 * framework.
 *
 * @author Stuart Sierra, http://stuartsierra.com/
 */
public class LocalSetup {

    private FileSystem fileSystem;
    private Configuration config;

    /** Sets up Configuration and LocalFileSystem instances for
     * Hadoop.  Throws Exception if they fail.  Does not load any
     * Hadoop XML configuration files, just sets the minimum
     * configuration necessary to use the local file system.
     */
    public LocalSetup() throws Exception {
        config = new Configuration();

        /* Normally set in hadoop-default.xml, without it you get
         * "java.io.IOException: No FileSystem for scheme: file" */
        config.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");

        fileSystem = FileSystem.get(config);
        if (fileSystem.getConf() == null) {
            /* This happens if the FileSystem is not properly
             * initialized, causes NullPointerException later. */
            throw new Exception("LocalFileSystem configuration is null");
        }
    }

    /** Returns a Hadoop Configuration instance for use in Hadoop API
     * calls. */
    public Configuration getConf() {
        return config;
    }

    /** Returns a Hadoop FileSystem instance that provides access to
     * the local filesystem. */
    public FileSystem getLocalFileSystem() {
        return fileSystem;
    }
}

Reply via email to